{\rtf1\ansi\ansicpg1252\uc1 \deff0\deflang1033\deflangfe1031{\fonttbl{\f0\froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}{\f1\fswiss\fcharset0\fprq2{\*\panose 020b0604020202020204}Arial;}
{\f2\fmodern\fcharset0\fprq1{\*\panose 02070309020205020404}Courier New;}{\f3\froman\fcharset2\fprq2{\*\panose 05050102010706020507}Symbol;}{\f4\froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times;}
{\f5\fswiss\fcharset0\fprq2{\*\panose 020b0604020202020204}Helvetica;}{\f6\fmodern\fcharset0\fprq1{\*\panose 00000000000000000000}Courier;}{\f7\fswiss\fcharset0\fprq2{\*\panose 00000000000000000000}Geneva;}
{\f8\froman\fcharset0\fprq2{\*\panose 00000000000000000000}Tms Rmn;}{\f9\fswiss\fcharset0\fprq2{\*\panose 00000000000000000000}Helv;}{\f10\froman\fcharset0\fprq2{\*\panose 00000000000000000000}MS Serif;}
{\f11\fswiss\fcharset0\fprq2{\*\panose 00000000000000000000}MS Sans Serif;}{\f12\froman\fcharset0\fprq2{\*\panose 00000000000000000000}New York;}{\f13\fswiss\fcharset0\fprq2{\*\panose 00000000000000000000}System;}
{\f14\fnil\fcharset2\fprq2{\*\panose 05000000000000000000}Wingdings;}{\f15\fswiss\fcharset0\fprq3{\*\panose 020b0604030504040204}Tahoma;}{\f16\froman\fcharset238\fprq2 Times New Roman CE;}{\f17\froman\fcharset204\fprq2 Times New Roman Cyr;}
{\f19\froman\fcharset161\fprq2 Times New Roman Greek;}{\f20\froman\fcharset162\fprq2 Times New Roman Tur;}{\f21\froman\fcharset186\fprq2 Times New Roman Baltic;}{\f22\fswiss\fcharset238\fprq2 Arial CE;}{\f23\fswiss\fcharset204\fprq2 Arial Cyr;}
{\f25\fswiss\fcharset161\fprq2 Arial Greek;}{\f26\fswiss\fcharset162\fprq2 Arial Tur;}{\f27\fswiss\fcharset186\fprq2 Arial Baltic;}{\f28\fmodern\fcharset238\fprq1 Courier New CE;}{\f29\fmodern\fcharset204\fprq1 Courier New Cyr;}
{\f31\fmodern\fcharset161\fprq1 Courier New Greek;}{\f32\fmodern\fcharset162\fprq1 Courier New Tur;}{\f33\fmodern\fcharset186\fprq1 Courier New Baltic;}}{\colortbl;\red0\green0\blue0;\red0\green0\blue255;\red0\green255\blue255;\red0\green255\blue0;
\red255\green0\blue255;\red255\green0\blue0;\red255\green255\blue0;\red255\green255\blue255;\red0\green0\blue128;\red0\green128\blue128;\red0\green128\blue0;\red128\green0\blue128;\red128\green0\blue0;\red128\green128\blue0;\red128\green128\blue128;
\red192\green192\blue192;}{\stylesheet{\widctlpar\adjustright \fs20\lang1031\cgrid \snext0 Normal;}{\*\cs10 \additive Default Paragraph Font;}{\s15\widctlpar\adjustright \fs20\lang1031\cgrid \sbasedon0 \snext15 endnote text;}{\s16\widctlpar\adjustright 
\fs20\lang1031\cgrid \sbasedon0 \snext16 footnote text;}{\*\cs17 \additive \super \sbasedon10 footnote reference;}{\*\cs18 \additive \super \sbasedon10 endnote reference;}}{\info{\author Daffy Duck}{\operator Daffy Duck}
{\creatim\yr1999\mo9\dy10\hr13\min41}{\revtim\yr1999\mo9\dy10\hr13\min41}{\version3}{\edmins0}{\nofpages34}{\nofwords11096}{\nofchars63248}{\*\company ACME}{\nofcharsws77672}{\vern113}}\margl1417\margr1417\margt1417\margb1134 
\widowctrl\endnotes\aendnotes\hyphhotz425\ftnnrlc\aftnnar\hyphcaps0\viewkind4\viewscale100 \fet1\sectd \linex0\headery709\footery709\colsx709\sectdefaultcl {\*\pnseclvl1\pnucrm\pnstart1\pnindent720\pnhang{\pntxta .}}{\*\pnseclvl2
\pnucltr\pnstart1\pnindent720\pnhang{\pntxta .}}{\*\pnseclvl3\pndec\pnstart1\pnindent720\pnhang{\pntxta .}}{\*\pnseclvl4\pnlcltr\pnstart1\pnindent720\pnhang{\pntxta )}}{\*\pnseclvl5\pndec\pnstart1\pnindent720\pnhang{\pntxtb (}{\pntxta )}}{\*\pnseclvl6
\pnlcltr\pnstart1\pnindent720\pnhang{\pntxtb (}{\pntxta )}}{\*\pnseclvl7\pnlcrm\pnstart1\pnindent720\pnhang{\pntxtb (}{\pntxta )}}{\*\pnseclvl8\pnlcltr\pnstart1\pnindent720\pnhang{\pntxtb (}{\pntxta )}}{\*\pnseclvl9\pnlcrm\pnstart1\pnindent720\pnhang
{\pntxtb (}{\pntxta )}}\pard\plain \nowidctlpar\adjustright \fs20\lang1031\cgrid {\f11\cgrid0 \page }{\cs18\super #{\footnote\ftnalt \pard\plain \nowidctlpar\adjustright \fs20\lang1031\cgrid {\cs18\super h}{\f11\fs24\cgrid0 cAbout}}${\footnote\ftnalt 
\pard\plain \nowidctlpar\adjustright \fs20\lang1031\cgrid {\cs18\super A}{\f11\fs24\cgrid0 bout this help file}}}{\b\f11\fs24\cgrid0 About this help file}{\f11\cgrid0 
\par 
\par This file was made with the help of }{\f11\ul\cgrid0 Makertf 3.12b-1}{\v\f11\cgrid0 hcMakertf}{\f11\cgrid0  from the input file manual.texi.
\par 
\par START-INFO-DIR-ENTRY\line * Bzip2: (bzip2).\tab \tab A program and library for data compression.\line END-INFO-DIR-ENTRY
\par 
\par 0mm  2mm
\par 
\par This program, }{\f2\cgrid0 bzip2}{\f11\cgrid0 , and associated library }{\f2\cgrid0 libbzip2}{\f11\cgrid0 , are Copyright (C) 1996-1998 Julian R Seward.  All rights reserved.
\par 
\par Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 
\par }\pard \fi-720\li720\nowidctlpar\tx144\tx720\adjustright {\f11\cgrid0 \tab }{\f3\cgrid0 \'b7}{\f11\cgrid0 \tab    Redistributions of source code must retain the above copyright    notice, this list of conditions and the following disclaimer.
\par 
\par \tab }{\f3\cgrid0 \'b7}{\f11\cgrid0 \tab    The origin of this software must not be misrepresented; you must    not claim that you wrote th
e original software.  If you use this    software in a product, an acknowledgment in the product    documentation would be appreciated but is not required.
\par 
\par \tab }{\f3\cgrid0 \'b7}{\f11\cgrid0 \tab    Altered source versions must be plainly marked as such, and must    not be misrepresented as being the original software.
\par 
\par \tab }{\f3\cgrid0 \'b7}{\f11\cgrid0 \tab    The name of the author may not be used to endorse or promote    products derived from this software without specific prior written    permission.
\par }\pard \nowidctlpar\adjustright {\f11\cgrid0 THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR IMPLIED
 WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL D
A
MAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\par 
\par Julian Seward, Guildford, Surrey, UK.
\par 
\par }{\f2\cgrid0 jseward@acm.org}{\f11\cgrid0 
\par 
\par }{\f2\cgrid0 http://www.muraroa.demon.co.uk}{\f11\cgrid0 
\par 
\par }{\f2\cgrid0 bzip2}{\f11\cgrid0 /}{\f2\cgrid0 libbzip2}{\f11\cgrid0  version 0.9.0 of 23 August 1998.
\par 
\par PATENTS: To the best of my knowledge, }{\f2\cgrid0 bzip2}{\f11\cgrid0  does not use any patented algorithms.  However, I do not have the resources available to carry out a full patent search.  Therefore I cannot give any guarantee of the above statement.

\par 
\par }\pard \keepn\nowidctlpar\adjustright {\f11\cgrid0 \page Node: }{\b\f11\cgrid0 Overview}{\f11\cgrid0 , Next: }{\f11\uldb\cgrid0 Implementation}{\v\f11\cgrid0 Implementation}{\f11\cgrid0 , Prev: }{\f11\uldb\cgrid0 Top}{\v\f11\cgrid0 Top}{\f11\cgrid0 , Up: 
}{\f11\uldb\cgrid0 Top}{\v\f11\cgrid0 Top}{\f11\cgrid0 \line }{\cs18\super K{\footnote\ftnalt \pard\plain \nowidctlpar\adjustright \fs20\lang1031\cgrid {\cs18\super K}{\f11\fs24\cgrid0  Overview}}#{\footnote\ftnalt \pard\plain \nowidctlpar\adjustright 
\fs20\lang1031\cgrid {\cs18\super O}{\f11\fs24\cgrid0 verview}}${\footnote\ftnalt \pard\plain \nowidctlpar\adjustright \fs20\lang1031\cgrid {\cs18\super I}{\f11\fs24\cgrid0 ntroduction}}}{\f11\cgrid0 
\par }\pard \nowidctlpar\adjustright {\b\f11\fs24\cgrid0 Introduction}{\f11\cgrid0 
\par 
\par }{\f2\cgrid0 bzip2}{\f11\cgrid0   compresses  files  using the Burrows-Wheeler block-sorting text compression algorithm,  and  Huffman  coding.  Compression  is  generally  consider
ably  better than that achieved by more conventional LZ77/LZ78-based compressors, and  approaches  the performance of the PPM family of statistical compressors.
\par 
\par }{\f2\cgrid0 bzip2}{\f11\cgrid0  is built on top of }{\f2\cgrid0 libbzip2}{\f11\cgrid0 , a flexible library for handling compressed data in the }{\f2\cgrid0 bzip2}{\f11\cgrid0 
 format.  This manual describes both how to use the program and how to work with the library interface.  Most of the manual is devoted to this library, not the program, which is good news if your interest is only in the program.
\par 
\par Chapter 2 describes how to use }{\f2\cgrid0 bzip2}{\f11\cgrid0 
; this is the only part you need to read if you just want to know how to operate the program.  Chapter 3 describes the programming interfaces in detail, and Chapter 4 records some miscellaneous notes which I thought ought to be recorded somewhere.
\par 
\par }{\b\f11\cgrid0 How to use }{\b\f2\cgrid0 bzip2}{\f11\cgrid0 
\par 
\par This chapter contains a copy of the }{\f2\cgrid0 bzip2}{\f11\cgrid0  man page, and nothing else.  
\par }\pard \li720\keep\nowidctlpar\adjustright {\f2\cgrid0 NAME\line        bzip2, bunzip2 - a block-sorting file compressor, v0.9.0\line        bzcat - decompresses files to stdout\line        bzip2recover - recovers data from damaged bzip2 files\line \line 
\line SYNOPSIS\line        bzip2 [ -cdfkstvzVL123456789 ] [ filenames ...  ]\line        bunzip2 [ -fkvsVL ] [ filenames ...  ]\line        bzcat [ -s ] [ filenames ...  ]\line        bzip2recover filename\line \line \line DESCRIPTION\line 
       bzip2  compresses  files  using the Burrows-Wheeler block-\line        sorting text compression algorithm,  and  Huffman  coding.\line        Compression  is  generally  considerably  better than that\line 
       achieved by more conventional LZ77/LZ78-based compressors,\line        and  approaches  the performance of the PPM family of sta-\line        tistical compressors.\line \line        The command-line options are deliberately very similar  to\line 
       those of GNU Gzip, but they are not identical.\line \line        bzip2  expects  a list of file names to accompany the com-\line        mand-line flags.  Each file is replaced  by  a  compressed\line 
       version  of  itself,  with  the  name "original_name.bz2".\line        Each compressed file has the same  modification  date  and\line        permissions  as  the corresponding original, so that these\line 
       properties can  be  correctly  restored  at  decompression\line        time.  File name handling is naive in the sense that there\line        is no mechanism for preserving original file  names,  per-\line        mission
s  and  dates  in filesystems which lack these con-\line        cepts, or have serious file name length restrictions, such\line        as MS-DOS.\line \line        bzip2  and  bunzip2 will by default not overwrite existing\line 
       files; if you want this to happen, specify the -f flag.\line \line        If no file names  are  specified,  bzip2  compresses  from\line        standard  input  to  standard output.  In this case, bzip2\line 
       will decline to write compressed output to a terminal,  as\line        this  would  be  entirely  incomprehensible  and therefore\line        pointless.\line \line        bunzip2 (or bzip2 -d ) decompresses and restores all spec-\line 
       ified files whose names end in ".bz2".  Files without this\line        suffix are ignored.  Again, supplying no filenames  causes\line        decompression from standard input to standard output.\line \line 
       bunzip2 will correctly decompress a file which is the con-\line        catenation of two or more compressed files.  The result is\line        the concatenation of the corresponding uncompressed files.\line        Integrity
 testing (-t) of concatenated compressed files is\line        also supported.\line \line        You  can also compress or decompress files to the standard\line        output by giving the -c flag.  Multiple files may be  com-\line 
       pressed and decompressed like this.  The resulting outputs\line        are fed sequentially to stdout.  Compression  of  multiple\line        files  in this manner generates a stream containing multi-\line 
       ple compressed file representations.  Such a stream can be\line        decompressed  correctly  only  by  bzip2  version 0.9.0 or\line        later.  Earlier versions of bzip2 will stop  after  decom-\line 
       pressing the first file in the stream.\line \line        bzcat  (or bzip2 -dc ) decompresses all specified files to\line        the standard output.\line \line        Compression is always performed, even  if  the  compressed\line 
       file  is slightly larger than the original.  Files of less\line        than about one hundred bytes tend to get larger, since the\line        compression  mechanism  has  a  constant  overhead  in the\line        region of
 50 bytes.  Random data (including the output  of\line        most  file  compressors)  is  coded at about 8.05 bits per\line        byte, giving an expansion of around 0.5%.\line \line        As a self-check for your  protection,  bzip2  uses  32-bit
\line        CRCs  to make sure that the decompressed version of a file\line        is identical to the original.  This guards against corrup-\line        tion  of  the compressed data, and against undetected bugs\line 
       in bzip2 (hopefully very unlikely).  The chances  of  data\line        corruption  going  undetected  is  microscopic,  about one\line        chance in four billion for each file processed.  Be aware,\line 
       though,  that  the  check occurs upon decompression, so it\line        can only tell you that that something is wrong.  It  can't\line        help  you recover the original uncompressed data.  You can\line 
       use bzip2recover to  try  to  recover  data  from  damaged\line        files.\line \line        Return  values:  0  for a normal exit, 1 for environmental\line        problems (file not found, invalid flags, I/O errors,  &c),\line 
       2 to indicate a corrupt compressed file, 3 for an internal\line        consistency error (eg, bug) which caused bzip2 to panic.\line \line \line MEMORY MANAGEMENT\line        Bzip2 compresses large files in blocks.   The  block  size\line 
       affects  both  the  compression  ratio  achieved,  and the\line        amount of memory needed both for  compression  and  decom-\line        pression.   The flags -1 through -9 specify the block size\line 
       to be 100,000 bytes through 900,000  bytes  (the  default)\line        respectively.   At decompression-time, the block size used\line        for compression is read from the header of the  compressed\line 
       file, and bunzip2 then allocates itself just enough memory\line        to decompress the file.  Since block sizes are  stored  in\line        compressed  files,  it follows that the flags -1 to -9 are\line 
       irrelevant  to  and  so  ignored   during   decompression.\line \line        Compression  and decompression requirements, in bytes, can\line        be estimated as:\line \line              Compression:   400k + ( 7 x block size )\line \line 
             Decompression: 100k + ( 4 x block size ), or\line                             100k + ( 2.5 x block size )\line \line        Larger  block  sizes  give  rapidly  diminishing  marginal\line 
       returns;  most of the compression comes from the first two\line        or three hundred k of block size, a fact worth bearing  in\line        mind  when  using  bzip2  on  small  machines.  It is also\line 
       important to  appreciate  that  the  decompression  memory\line        requirement  is  set  at compression-time by the choice of\line        block size.\line \line        For files compressed with the  default  900k  block  size,\line 
       bunzip2  will require about 3700 kbytes to decompress.  To\line        support decompression of any file on a 4 megabyte machine,\line        bunzip2  has  an  option to decompress using approximately\line 
       half this amount of memory, about 2300 kbytes.  Decompres-\line        sion  speed  is also halved, so you should use this option\line        only where necessary.  The relevant flag is -s.\line \line        In general, try and 
use the largest block size memory con-\line        straints  allow,  since  that  maximises  the  compression\line        achieved.  Compression and decompression speed are  virtu-\line        ally unaffected by block size.\line \line 
       Another  significant point applies to files which fit in a\line        single block -- that  means  most  files  you'd  encounter\line        using  a  large  block  size.   The  amount of real memory\line 
       touched is proportional to the size of the file, since the\line        file  is smaller than a block.  For example, compressing a\line        file 20,000 bytes long with the flag  -9  will  cause  the\line 
       compressor  to  allocate  around 6700k of memory, but only\line        touch 400k + 20000 * 7 = 540 kbytes of it.  Similarly, the\line        decompressor  will  allocate  3700k  but only touch 100k +\line        20000 * 4 = 180 kbytes.\line \line 
       Here is a table which summarises the maximum memory  usage\line        for  different  block  sizes.   Also recorded is the total\line        compressed size for 14 files of the Calgary Text  Compres-\line 
       sion  Corpus totalling 3,141,622 bytes.  This column gives\line        some feel for how  compression  varies  with  block  size.\line        These  figures  tend to understate the advantage of larger\line 
       block sizes for larger files, since the  Corpus  is  domi-\line        nated by smaller files.\line \line                   Compress   Decompress   Decompress   Corpus\line            Flag     usage      usage       -s usage     Size\line \line 
            -1      1100k       500k         350k      914704\line             -2      1800k       900k         600k      877703\line             -3      2500k      1300k         850k      860338\line 
            -4      3200k      1700k        1100k      846899\line             -5      3900k      2100k        1350k      845160\line             -6      4600k      2500k        1600k      838626\line 
            -7      5400k      2900k        1850k      834096\line             -8      6000k      3300k        2100k      828642\line             -9      6700k      3700k        2350k      828642\line \line \line OPTIONS\line        -c --stdout\line 
              Compress or decompress to standard output.  -c will\line               decompress multiple files to stdout, but will  only\line               compress a single file to stdout.\line \line        -d --decompress\line 
              Force  decompression.  bzip2, bunzip2 and bzcat are\line               really the same program,  and  the  decision  about\line               what  actions to take is done on the basis of which\line 
              name is used.  This flag overrides that  mechanism,\line               and forces bzip2 to decompress.\line \line        -z --compress\line               The  complement  to -d: forces compression, regard-\line 
              less of the invokation name.\line \line        -t --test\line               Check integrity of the specified file(s), but don't\line               decompress  them.   This  really  performs  a trial\line 
              decompression and throws away the result.\line \line        -f --force\line               Force overwrite of output files.   Normally,  bzip2\line               will not overwrite existing output files.\line \line        -k --keep\line 
              Keep  (don't delete) input files during compression\line               or decompression.\line \line        -s --small\line               Reduce memory usage, for compression, decompression\line 
              and  testing.   Files  are  decompressed and tested\line               using a modified algorithm which only requires  2.5\line               bytes  per  block byte.  This means any file can be\line 
              decompressed in 2300k of memory,  albeit  at  about\line               half the normal speed.\line \line               During  compression,  -s  selects  a  block size of\line               200k, which limits memory use to  around  the  same
\line               figure,  at  the expense of your compression ratio.\line               In short, if your  machine  is  low  on  memory  (8\line               megabytes  or  less),  use  -s for everything.  See\line 
              MEMORY MANAGEMENT above.\line \line        -v --verbose\line               Verbose mode -- show the compression ratio for each\line               file  processed.   Further  -v's  increase the ver-\line               bosity level, spewing ou
t lots of information which\line               is primarily of interest for diagnostic purposes.\line \line        -L --license -V --version\line               Display  the  software  version,  license terms and\line               conditions.\line \line 
       -1 to -9\line               Set the block size to 100 k, 200 k ..  900  k  when\line               compressing.   Has  no  effect  when decompressing.\line               See MEMORY MANAGEMENT above.\line \line        --repetitive-fast\line 
              bzip2 injects some small  pseudo-random  variations\line               into  very  repetitive  blocks  to limit worst-case\line               performance during compression.   If  sorting  runs\line 
              into  difficulties,  the  block  is randomised, and\line               sorting is restarted.  Very roughly, bzip2 persists\line               for  three  times  as  long as a well-behaved input\line 
              would take before resorting to randomisation.  This\line               flag makes it give up much sooner.\line \line        --repetitive-best\line               Opposite  of  --repetitive-fast;  try  a lot harder\line 
              before resorting to randomisation.\line \line \line RECOVERING DATA FROM DAMAGED FILES\line        bzip2 compresses files in blocks, usually 900kbytes  long.\line        Each block is handled independently.  If a media or trans-\line 
       mission error causes a multi-block  .bz2  file  to  become\line        damaged,  it  may  be  possible  to  recover data from the\line        undamaged blocks in the file.\line \line        The compressed representation of each block  is  delimited
\line        by  a  48-bit pattern, which makes it possible to find the\line        block boundaries with reasonable  certainty.   Each  block\line        also  carries its own 32-bit CRC, so damaged blocks can be\line 
       distinguished from undamaged ones.\line \line        bzip2recover is a  simple  program  whose  purpose  is  to\line        search  for blocks in .bz2 files, and write each block out\line 
       into its own .bz2 file.  You can then use bzip2 -t to test\line        the integrity of the resulting files, and decompress those\line        which are undamaged.\line \line        bzip2recover takes a single argument, the name of the dam-\line 
       aged file, and writes a number of files "rec0001file.bz2",\line        "rec0002file.bz2", etc, containing the  extracted  blocks.\line        The  output  filenames  are  designed  so  that the use of\line        wildca
rds in subsequent processing -- for example,  "bzip2\line        -dc  rec*file.bz2  > recovered_data" -- lists the files in\line        the "right" order.\line \line        bzip2recover should be of most use dealing with large .bz2\line 
       files,  as  these will contain many blocks.  It is clearly\line        futile to use it on damaged single-block  files,  since  a\line        damaged  block  cannot  be recovered.  If you wish to min-\line 
       imise any potential data loss through media  or  transmis-\line        sion errors, you might consider compressing with a smaller\line        block size.\line \line \line PERFORMANCE NOTES\line 
       The sorting phase of compression gathers together  similar\line        strings  in  the  file.  Because of this, files containing\line        very long runs of  repeated  symbols,  like  "aabaabaabaab\line 
       ..."   (repeated   several  hundred  times)  may  compress\line        extraordinarily slowly.  You can use the -vvvvv option  to\line        monitor progress in great detail, if you want.  Decompres-\line        sion speed is unaffected.\line 
\line        Such pathological cases seem rare in  practice,  appearing\line        mostly in artificially-constructed test files, and in low-\line        level disk images.  It may be inadvisable to use bzip2  to\line 
       compress  the  latter.   If you do get a file which causes\line        severe slowness in compression, try making the block  size\line        as small as possible, with flag -1.\line \line 
       bzip2  usually  allocates  several  megabytes of memory to\line        operate in, and then charges all over it in a fairly  ran-\line        dom  fashion.   This means that performance, both for com-\line 
       pressing and decompressing, is largely determined  by  the\line        speed  at  which  your  machine  can service cache misses.\line        Because of this, small changes to the code to  reduce  the\line        miss
  rate  have  been observed to give disproportionately\line        large performance improvements.  I imagine bzip2 will per-\line        form best on machines with very large caches.\line \line \line CAVEATS\line 
       I/O  error  messages  are not as helpful as they could be.\line        Bzip2 tries hard to detect I/O errors  and  exit  cleanly,\line        but  the  details  of  what  the problem is sometimes seem\line        rather misleading.\line \line 
       This manual page pertains to version 0.9.0 of bzip2.  Com-\line        pressed  data created by this version is entirely forwards\line        and backwards compatible with the previous public release,\line 
       version  0.1pl2,  but  with the following exception: 0.9.0\line        can correctly decompress multiple concatenated  compressed\line        files.   0.1pl2  cannot do this; it will stop after decom-\line 
       pressing just the first file in the stream.\line \line        Wildcard expansion for Windows 95 and NT is flaky.\line \line        bzip2recover uses 32-bit integers to represent  bit  posi-\line        tions  in compressed files, so it c
annot handle compressed\line        files more than 512 megabytes long.  This could easily  be\line        fixed.\line \line \line AUTHOR\line        Julian Seward, jseward@acm.org.\line \line 
       The ideas embodied in bzip2 are due to (at least) the fol-\line        lowing people: Michael Burrows and David Wheeler (for  the\line        block  sorting  transformation), David Wheeler (again, for\line 
       the Huffman coder), Peter Fenwick (for the structured cod-\line        ing model in the original bzip, and many refinements), and\line        Alistair Moffat, Radford Neal  and  Ian  Witten  (for  the\line 
       arithmetic  coder  in  the  original  bzip).   I  am  much\line        indebted for their help, support and advice.  See the man-\line        ual  in the source distribution for pointers to sources of\line        documentation.
  Christian von Roques encouraged me to look\line        for  faster sorting algorithms, so as to speed up compres-\line        sion.  Bela Lubkin encouraged me to improve the worst-case\line 
       compression performance.  Many people sent patches, helped\line        with portability problems, lent machines, gave advice  and\line        were generally helpful.
\par }\pard \nowidctlpar\adjustright {\f11\cgrid0 
\par }{\b\f11\cgrid0 Programming with }{\b\f2\cgrid0 libbzip2}{\f11\cgrid0 
\par 
\par This chapter describes the programming interface to }{\f2\cgrid0 libbzip2}{\f11\cgrid0 .
\par 
\par For general background information, particularly about memory use and performance aspects, you'd be well advised to read Chapter 2 as well.
\par 
\par }{\b\f11\cgrid0 Top-level structure}{\f11\cgrid0 
\par 
\par }{\f2\cgrid0 libbzip2}{\f11\cgrid0  is a flexible library for compressing and decompressing data in the }{\f2\cgrid0 bzip2}{\f11\cgrid0 
 data format.  Although packaged as a single entity, it helps to regard the library as three separate parts: the low level interface, and the high level interface, and some utility functions.
\par 
\par The structure of }{\f2\cgrid0 libbzip2}{\f11\cgrid0 's interfaces is similar to that of Jean-loup Gailly's and Mark Adler's excellent }{\f2\cgrid0 zlib}{\f11\cgrid0  library.
\par 
\par }{\b\f11\cgrid0 Low-level summary}{\f11\cgrid0 
\par 
\par This interface provides services for compressing and decompressing data in memory.  There's no provision for dealing with files, streams or any other I/O mechanisms, just straigh
t memory-to-memory work.  In fact, this part of the library can be compiled without inclusion of }{\f2\cgrid0 stdio.h}{\f11\cgrid0 , which may be helpful for embedded applications.
\par 
\par The low-level part of the library has no global variables and is therefore thread-safe.
\par 
\par Six routines make up the low level interface: }{\f2\cgrid0 bzCompressInit}{\f11\cgrid0 , }{\f2\cgrid0 bzCompress}{\f11\cgrid0 , and
\par }{\f2\cgrid0 bzCompressEnd}{\f11\cgrid0  for compression, and a corresponding trio }{\f2\cgrid0 bzDecompressInit}{\f11\cgrid0 ,
\par }{\f2\cgrid0 bzDecompress}{\f11\cgrid0  and }{\f2\cgrid0 bzDecompressEnd}{\f11\cgrid0  for decompression.  The }{\f2\cgrid0 *Init}{\f11\cgrid0  functions allocate memory for compression/decompression and do other initialisations, whilst the }{\f2\cgrid0 
*End}{\f11\cgrid0  functions close down operations and release memory.
\par 
\par The real work is done by }{\f2\cgrid0 bzCompress}{\f11\cgrid0  and }{\f2\cgrid0 bzDecompress}{\f11\cgrid0 .  These compress/decompress data from a user-supplied input buffer to a user-supplied output buffer. 
 These buffers can be any size; arbitrary quantities of data are handled by making repeated calls to these functions.  This is a flexible mechanism allowing a consumer-pull style of activity, or producer-push, or a mixture of both.
\par 
\par }{\b\f11\cgrid0 High-level summary}{\f11\cgrid0 
\par 
\par This interface provides some handy wrappers around the low-level interface to facilitate reading and writing }{\f2\cgrid0 bzip2}{\f11\cgrid0  format files (}{\f2\cgrid0 .bz2}{\f11\cgrid0 
 files).  The routines provide hooks to facilitate reading files in which the }{\f2\cgrid0 bzip2}{\f11\cgrid0  data stream is embedded within some larger-scale file structure, or where there are multiple }{\f2\cgrid0 bzip2}{\f11\cgrid0 
 data streams concatenated end-to-end.
\par 
\par For reading files, }{\f2\cgrid0 bzReadOpen}{\f11\cgrid0 , }{\f2\cgrid0 bzRead}{\f11\cgrid0 , }{\f2\cgrid0 bzReadClose}{\f11\cgrid0  and }{\f2\cgrid0 bzReadGetUnused}{\f11\cgrid0  are supplied.  For writing files, }{\f2\cgrid0 bzWriteOpen}{\f11\cgrid0 , }{
\f2\cgrid0 bzWrite}{\f11\cgrid0  and }{\f2\cgrid0 bzWriteFinish}{\f11\cgrid0  are available.
\par 
\par As with the low-level library, no global variables are used so the library is per se thread-safe.  However, if I/O errors occur whilst reading or writing the underlying compressed files, you may have to consult }{\f2\cgrid0 errno}{\f11\cgrid0 
 to determine the cause of the error.  In that case, you'd need a C library which correctly supports }{\f2\cgrid0 errno}{\f11\cgrid0  in a multithreaded environment.
\par 
\par To make the library a little simpler and more portable, }{\f2\cgrid0 bzReadOpen}{\f11\cgrid0  and }{\f2\cgrid0 bzWriteOpen}{\f11\cgrid0  require you to pass them file handles (}{\f2\cgrid0 FILE*}{\f11\cgrid0 s) which have previously been
 opened for reading or writing respectively.  That avoids portability problems associated with file operations and file attributes, whilst not being much of an imposition on the programmer.
\par 
\par }{\b\f11\cgrid0 Utility functions summary}{\f11\cgrid0 
\par 
\par For very simple needs, }{\f2\cgrid0 bzBuffToBuffCompress}{\f11\cgrid0  and }{\f2\cgrid0 bzBuffToBuffDecompress}{\f11\cgrid0 
 are provided.  These compress data in memory from one buffer to another buffer in a single function call.  You should assess whether these functions fulfill your memory-to-memory compression/decompression req
uirements before investing effort in understanding the more general but more complex low-level interface.
\par 
\par Yoshioka Tsuneo (}{\f2\cgrid0 QWF00133@niftyserve.or.jp}{\f11\cgrid0  / }{\f2\cgrid0 tsuneo-y@is.aist-nara.ac.jp}{\f11\cgrid0 ) has contributed some functions to give better }{\f2\cgrid0 zlib}{\f11\cgrid0  compatibility.  These functions are }{\f2\cgrid0 
bzopen}{\f11\cgrid0 , }{\f2\cgrid0 bzread}{\f11\cgrid0 , }{\f2\cgrid0 bzwrite}{\f11\cgrid0 , }{\f2\cgrid0 bzflush}{\f11\cgrid0 , }{\f2\cgrid0 bzclose}{\f11\cgrid0 , }{\f2\cgrid0 bzerror}{\f11\cgrid0  and }{\f2\cgrid0 bzlibVersion}{\f11\cgrid0 
.  You may find these functions more convenient for simple file reading and writing, than those in the high-level interface.  These functions are not (y
et) officially part of the library, and are not further documented here.  If they break, you get to keep all the pieces.  I hope to document them properly when time permits.
\par 
\par Yoshioka also contributed modifications to allow the library to be built as a Windows DLL.
\par 
\par }{\b\f11\cgrid0 Error handling}{\f11\cgrid0 
\par 
\par The library is designed to recover cleanly in all situations, including the worst-case situation of decompressing random data.  I'm not 100% sure that it can always do this, so you might want to add a signal handler to catch segm
entation violations during decompression if you are feeling especially paranoid.  I would be interested in hearing more about the robustness of the library to corrupted compressed data.
\par 
\par The file }{\f2\cgrid0 bzlib.h}{\f11\cgrid0  contains all definitions needed to use the library.  In particular, you should definitely not include }{\f2\cgrid0 bzlib_private.h}{\f11\cgrid0 .
\par 
\par In }{\f2\cgrid0 bzlib.h}{\f11\cgrid0 , the various return values are defined.  The following list is not intended as an exhaustive description of the circumstances in which a given value may be returned - those desc
riptions are given later.  Rather, it is intended to convey the rough meaning of each return value.  The first five actions are normal and not intended to denote an error situation.  
\par }{\f2\cgrid0 BZ_OK}{\f11\cgrid0 
\par }\pard \li720\nowidctlpar\adjustright {\f11\cgrid0 The requested action was completed successfully.
\par 
\par }\pard \nowidctlpar\adjustright {\f2\cgrid0 BZ_RUN_OK}{\f11\cgrid0 
\par }{\f2\cgrid0 BZ_FLUSH_OK}{\f11\cgrid0 
\par }{\f2\cgrid0 BZ_FINISH_OK}{\f11\cgrid0 
\par }\pard \li720\nowidctlpar\adjustright {\f11\cgrid0 In }{\f2\cgrid0 bzCompress}{\f11\cgrid0 , the requested flush/finish/nothing-special action was completed successfully.
\par 
\par }\pard \nowidctlpar\adjustright {\f2\cgrid0 BZ_STREAM_END}{\f11\cgrid0 
\par }\pard \li720\nowidctlpar\adjustright {\f11\cgrid0 Compression of data was completed, or the logical stream end was detected during decompression.
\par }\pard \nowidctlpar\adjustright {\f11\cgrid0 
\par The following return values indicate an error of some kind.  
\par }{\f2\cgrid0 BZ_SEQUENCE_ERROR}{\f11\cgrid0 
\par }\pard \li720\nowidctlpar\adjustright {\f11\cgrid0 When using the library, it is important to call the functions in the correct sequence and with data structures (buffers etc) in the correct states.  }{\f2\cgrid0 libbzip2}{\f11\cgrid0 
 checks as much as it can to ensure this is happening, and returns }{\f2\cgrid0 BZ_SEQUENCE_ERROR}{\f11\cgrid0 
 if not.  Code which complies precisely with the function semantics, as detailed below, should never receive this value; such an event denotes buggy code which you should investigate.
\par 
\par }\pard \nowidctlpar\adjustright {\f2\cgrid0 BZ_PARAM_ERROR}{\f11\cgrid0 
\par }\pard \li720\nowidctlpar\adjustright {\f11\cgrid0 Returned when a parameter to a function call is out of range or otherwise manifestly incorrect.  As with }{\f2\cgrid0 BZ_SEQUENCE_ERROR}{\f11\cgrid0 
, this denotes a bug in the client code.  The distinction between }{\f2\cgrid0 BZ_PARAM_ERROR}{\f11\cgrid0  and }{\f2\cgrid0 BZ_SEQUENCE_ERROR}{\f11\cgrid0  is a bit hazy, but still worth making.
\par 
\par }\pard \nowidctlpar\adjustright {\f2\cgrid0 BZ_MEM_ERROR}{\f11\cgrid0 
\par }\pard \li720\nowidctlpar\adjustright {\f11\cgrid0 Returned when a request to allocate memory failed.  Note that the quantity of memory needed to decompress a stream cannot be determined until the stream's header has been read.  So }{\f2\cgrid0 
bzDecompress}{\f11\cgrid0  and }{\f2\cgrid0 bzRead}{\f11\cgrid0  may return }{\f2\cgrid0 BZ_MEM_ERROR}{\f11\cgrid0  even though some of the compressed data has been read.  The same is not true for compression; once }{\f2\cgrid0 bzCompressInit}{
\f11\cgrid0  or }{\f2\cgrid0 bzWriteOpen}{\f11\cgrid0  have successfully completed, }{\f2\cgrid0 BZ_MEM_ERROR}{\f11\cgrid0  cannot occur.
\par 
\par }\pard \nowidctlpar\adjustright {\f2\cgrid0 BZ_DATA_ERROR}{\f11\cgrid0 
\par }\pard \li720\nowidctlpar\adjustright {\f11\cgrid0 Returned when a data integrity error is detected during decom
pression.  Most importantly, this means when stored and computed CRCs for the data do not match.  This value is also returned upon detection of any other anomaly in the compressed data.
\par 
\par }\pard \nowidctlpar\adjustright {\f2\cgrid0 BZ_DATA_ERROR_MAGIC}{\f11\cgrid0 
\par }\pard \li720\nowidctlpar\adjustright {\f11\cgrid0 As a special case of }{\f2\cgrid0 BZ_DATA_ERROR}{\f11\cgrid0 , it is sometimes useful to know when the compressed stream does not start with the correct magic bytes (}{\f2\cgrid0 'B' 'Z' 'h'}{\f11\cgrid0 
).
\par 
\par }\pard \nowidctlpar\adjustright {\f2\cgrid0 BZ_IO_ERROR}{\f11\cgrid0 
\par }\pard \li720\nowidctlpar\adjustright {\f11\cgrid0 Returned by }{\f2\cgrid0 bzRead}{\f11\cgrid0  and }{\f2\cgrid0 bzRead}{\f11\cgrid0  when there is an error reading or writing in the compressed file, and by }{\f2\cgrid0 bzReadOpen}{\f11\cgrid0  and }{
\f2\cgrid0 bzWriteOpen}{\f11\cgrid0  for attempts to use a file for which the error indicator (viz, }{\f2\cgrid0 ferror(f)}{\f11\cgrid0 ) is set.  On receipt of }{\f2\cgrid0 BZ_IO_ERROR}{\f11\cgrid0 , the caller should consult }{\f2\cgrid0 errno}{
\f11\cgrid0  and/or }{\f2\cgrid0 perror}{\f11\cgrid0  to acquire operating-system specific information about the problem.
\par 
\par }\pard \nowidctlpar\adjustright {\f2\cgrid0 BZ_UNEXPECTED_EOF}{\f11\cgrid0 
\par }\pard \li720\nowidctlpar\adjustright {\f11\cgrid0 Returned by }{\f2\cgrid0 bzRead}{\f11\cgrid0  when the compressed file finishes before the logical end of stream is detected.
\par 
\par }\pard \nowidctlpar\adjustright {\f2\cgrid0 BZ_OUTBUFF_FULL}{\f11\cgrid0 
\par }\pard \li720\nowidctlpar\adjustright {\f11\cgrid0 Returned by }{\f2\cgrid0 bzBuffToBuffCompress}{\f11\cgrid0  and }{\f2\cgrid0 bzBuffToBuffDecompress}{\f11\cgrid0  to indicate that the output data will not fit into the output buffer provided.
\par }\pard \nowidctlpar\adjustright {\f11\cgrid0 
\par }{\b\f11\cgrid0 Low-level interface}{\f11\cgrid0 
\par 
\par }{\b\f2\cgrid0 bzCompressInit}{\f11\cgrid0 
\par 
\par }\pard \li720\keep\nowidctlpar\adjustright {\f2\cgrid0 typedef\line    struct \{\line       char *next_in;\line       unsigned int avail_in;\line       unsigned int total_in;\line \line       char *next_out;\line       unsigned int avail_out;\line 
      unsigned int total_out;\line \line       void *state;\line \line       void *(*bzalloc)(void *,int,int);\line       void (*bzfree)(void *,void *);\line       void *opaque;\line    \}\line    bz_stream;\line \line 
int bzCompressInit ( bz_stream *strm,\line                      int blockSize100k,\line                      int verbosity,\line                      int workFactor );\line 
\par }\pard \nowidctlpar\adjustright {\f11\cgrid0 
\par Prepares for compression.  The }{\f2\cgrid0 bz_stream}{\f11\cgrid0  structure holds all data pertaining to the compression activity.  A }{\f2\cgrid0 bz_stream}{\f11\cgrid0  structure should be allocated and initialised prior to the call.  The fields of }{
\f2\cgrid0 bz_stream}{\f11\cgrid0  comprise the entirety of the user-visible data.  }{\f2\cgrid0 state}{\f11\cgrid0  is a pointer to the private data structures required for compression.
\par 
\par Custom memory allocators are supported, via fields }{\f2\cgrid0 bzalloc}{\f11\cgrid0 , }{\f2\cgrid0 bzfree}{\f11\cgrid0 , and }{\f2\cgrid0 opaque}{\f11\cgrid0 .  The value }{\f2\cgrid0 opaque}{\f11\cgrid0 
 is passed to as the first argument to all calls to }{\f2\cgrid0 bzalloc}{\f11\cgrid0  and }{\f2\cgrid0 bzfree}{\f11\cgrid0 , but is otherwise ignored by the library.  The call }{\f2\cgrid0 bzalloc ( opaque, n, m )}{\f11\cgrid0 
 is expected to return a pointer }{\f2\cgrid0 p}{\f11\cgrid0  to }{\f2\cgrid0 n * m}{\f11\cgrid0  bytes of memory, and }{\f2\cgrid0 bzfree ( opaque, p )}{\f11\cgrid0  should free that memory.
\par 
\par If you don't want to use a custom memory allocator, set }{\f2\cgrid0 bzalloc}{\f11\cgrid0 , }{\f2\cgrid0 bzfree}{\f11\cgrid0  and }{\f2\cgrid0 opaque}{\f11\cgrid0  to }{\f2\cgrid0 NULL}{\f11\cgrid0 , and the library will then use the standard }{
\f2\cgrid0 malloc}{\f11\cgrid0 /}{\f2\cgrid0 free}{\f11\cgrid0  routines.
\par 
\par Before calling }{\f2\cgrid0 bzCompressInit}{\f11\cgrid0 , fields }{\f2\cgrid0 bzalloc}{\f11\cgrid0 , }{\f2\cgrid0 bzfree}{\f11\cgrid0  and }{\f2\cgrid0 opaque}{\f11\cgrid0 
 should be filled appropriately, as just described.  Upon return, the internal state will have been allocated and initialised, and }{\f2\cgrid0 total_in}{\f11\cgrid0  and }{\f2\cgrid0 total_out}{\f11\cgrid0 
 will have been set to zero.  These last two fields are used by the library to inform the caller of the total amount of data passed into and out of the library, respectively.  You should not try to change them.
\par 
\par Parameter }{\f2\cgrid0 blockSize100k}{\f11\cgrid0 
 specifies the block size to be used for compression.  It should be a value between 1 and 9 inclusive, and the actual block size used is 100000 x this figure.  9 gives the best compression but takes most memory.
\par 
\par Parameter }{\f2\cgrid0 verbosity}{\f11\cgrid0  should be set to a number between 0 and 4 inclusive.  0 is silent, and greater numbers give increasingly verbose monitoring/debugging output.  If the library has been compiled with }{\f2\cgrid0 -DBZ_NO_STDIO}
{\f11\cgrid0 , no such output will appear for any verbosity setting.
\par 
\par Parameter }{\f2\cgrid0 workFactor}{\f11\cgrid0 
 controls how the compression phase behaves when presented with worst case, highly repetitive, input data.  If compression runs into difficulties caused by repetitive data, some pseudo-random variations are inserted into the block, a
nd compression is restarted.  Lower values of }{\f2\cgrid0 workFactor}{\f11\cgrid0 
 reduce the tolerance of compression to repetitive data.  You should set this parameter carefully; too low, and compression ratio suffers, too high, and your average-to-worst case compression times ca
n become very large.  The default value of 30 gives reasonable behaviour over a wide range of circumstances.
\par 
\par Allowable values range from 0 to 250 inclusive.  0 is a special case, equivalent to using the default value of 30.
\par 
\par Note that the randomisation pr
ocess is entirely transparent.  If the library decides to randomise and restart compression on a block, it does so without comment.  Randomised blocks are automatically de-randomised during decompression, so data integrity is never compromised.
\par 
\par Possible return values: 
\par }\pard \li720\keep\nowidctlpar\adjustright {\f2\cgrid0       BZ_PARAM_ERROR\line          if strm is NULL\line          or blockSize < 1 or blockSize > 9\line          or verbosity < 0 or verbosity > 4\line          or workFactor < 0 or workFactor > 250
\line       BZ_MEM_ERROR\line          if not enough memory is available\line       BZ_OK\line          otherwise
\par }\pard \nowidctlpar\adjustright {\f11\cgrid0 Allowable next actions: 
\par }\pard \li720\keep\nowidctlpar\adjustright {\f2\cgrid0       bzCompress\line          if BZ_OK is returned\line       no specific action needed in case of error
\par }\pard \nowidctlpar\adjustright {\f11\cgrid0 
\par }{\b\f2\cgrid0 bzCompress}{\f11\cgrid0 
\par 
\par }\pard \li720\keep\nowidctlpar\adjustright {\f2\cgrid0    int bzCompress ( bz_stream *strm, int action );
\par }\pard \nowidctlpar\adjustright {\f11\cgrid0 Provides more input and/or output buffer space for the library.  The caller maintains input and output buffers, and calls }{\f2\cgrid0 bzCompress}{\f11\cgrid0  to transfer data between them.
\par 
\par Before each call to }{\f2\cgrid0 bzCompress}{\f11\cgrid0 , }{\f2\cgrid0 next_in}{\f11\cgrid0  should point at the data to be compressed, and }{\f2\cgrid0 avail_in}{\f11\cgrid0  should indicate how many bytes the library may read.  }{\f2\cgrid0 bzCompress}
{\f11\cgrid0  updates }{\f2\cgrid0 next_in}{\f11\cgrid0 , }{\f2\cgrid0 avail_in}{\f11\cgrid0  and }{\f2\cgrid0 total_in}{\f11\cgrid0  to reflect the number of bytes it has read.
\par 
\par Similarly, }{\f2\cgrid0 next_out}{\f11\cgrid0  should point to a buffer in which the compressed data is to be placed, with }{\f2\cgrid0 avail_out}{\f11\cgrid0  indicating how much output space is available.  }{\f2\cgrid0 bzCompress}{\f11\cgrid0  updates }
{\f2\cgrid0 next_out}{\f11\cgrid0 , }{\f2\cgrid0 avail_out}{\f11\cgrid0  and }{\f2\cgrid0 total_out}{\f11\cgrid0  to reflect the number of bytes output.
\par 
\par You may provide and remove as little or as much data as you like on each call of }{\f2\cgrid0 bzCompress}{\f11\cgrid0 .  In the limi
t, it is acceptable to supply and remove data one byte at a time, although this would be terribly inefficient.  You should always ensure that at least one byte of output space is available at each call.
\par 
\par A second purpose of }{\f2\cgrid0 bzCompress}{\f11\cgrid0  is to request a change of mode of the compressed stream.
\par 
\par Conceptually, a compressed stream can be in one of four states: IDLE, RUNNING, FLUSHING and FINISHING.  Before initialisation (}{\f2\cgrid0 bzCompressInit}{\f11\cgrid0 ) and after termination (}{\f2\cgrid0 bzCompressEnd}{\f11\cgrid0 
), a stream is regarded as IDLE.
\par 
\par Upon initialisation (}{\f2\cgrid0 bzCompressInit}{\f11\cgrid0 ), the stream is placed in the RUNNING state.  Subsequent calls to }{\f2\cgrid0 bzCompress}{\f11\cgrid0  should pass }{\f2\cgrid0 BZ_RUN}{\f11\cgrid0 
 as the requested action; other actions are illegal and will result in }{\f2\cgrid0 BZ_SEQUENCE_ERROR}{\f11\cgrid0 .
\par 
\par At some point, the calling program will have provided all the input data it wants to.  It will then want to finish up - in effect, asking the library to process any data it might have buffered internally.  In this state, }{\f2\cgrid0 bzCompress}{
\f11\cgrid0  will no longer attempt to read data from }{\f2\cgrid0 next_in}{\f11\cgrid0 , but it will want to write data to }{\f2\cgrid0 next_out}{\f11\cgrid0 
.  Because the output buffer supplied by the user can be arbitrarily small, the finishing-up operation cannot necessarily be done with a single call of }{\f2\cgrid0 bzCompress}{\f11\cgrid0 .
\par 
\par Instead, the calling program passes }{\f2\cgrid0 BZ_FINISH}{\f11\cgrid0  as an action to }{\f2\cgrid0 bzCompress}{\f11\cgrid0 .  This changes the stream's state to FINISHING.  Any remaining input (ie, }{\f2\cgrid0 next_in[0 .. avail_in-1]}{\f11\cgrid0 
) is compressed and transferred to the output buffer.  To do this, }{\f2\cgrid0 bzCompress}{\f11\cgrid0  must be called repeatedly until all the output has been consumed.  At that point, }{\f2\cgrid0 bzCompress}{\f11\cgrid0  returns }{\f2\cgrid0 
BZ_STREAM_END}{\f11\cgrid0 , and the stream's state is set back to IDLE.  }{\f2\cgrid0 bzCompressEnd}{\f11\cgrid0  should then be called.
\par 
\par Just to make sure the calling program does not cheat, the library makes a note of }{\f2\cgrid0 avail_in}{\f11\cgrid0  at the time of the first call to }{\f2\cgrid0 bzCompress}{\f11\cgrid0  which has }{\f2\cgrid0 BZ_FINISH}{\f11\cgrid0 
 as an action (ie, at the time the program has announced its intention to not supply any more input).  By comparing this value with that of }{\f2\cgrid0 avail_in}{\f11\cgrid0  over subsequent calls to }{\f2\cgrid0 bzCompress}{\f11\cgrid0 
, the library can detect any attempts to slip in more data to compress.  Any calls for which this is detected will return }{\f2\cgrid0 BZ_SEQUENCE_ERROR}{\f11\cgrid0 .  This indicates a programming mistake which should be corrected.
\par 
\par Instead of asking to finish, the calling program may ask }{\f2\cgrid0 bzCompress}{\f11\cgrid0  to take all the remaining 
input, compress it and terminate the current (Burrows-Wheeler) compression block.  This could be useful for error control purposes.  The mechanism is analogous to that for finishing: call }{\f2\cgrid0 bzCompress}{\f11\cgrid0  with an action of }{
\f2\cgrid0 BZ_FLUSH}{\f11\cgrid0 , remove output data, and persist with the }{\f2\cgrid0 BZ_FLUSH}{\f11\cgrid0  action until the value }{\f2\cgrid0 BZ_RUN}{\f11\cgrid0  is returned.  As with finishing, }{\f2\cgrid0 bzCompress}{\f11\cgrid0 
 detects any attempt to provide more input data once the flush has begun.
\par 
\par Once the flush is complete, the stream returns to the normal RUNNING state.
\par 
\par This all s
ounds pretty complex, but isn't really.  Here's a table which shows which actions are allowable in each state, what action will be taken, what the next state is, and what the non-error return values are.  Note that you can't explicitly ask what state the 
stream is in, but nor do you need to - it can be inferred from the values returned by }{\f2\cgrid0 bzCompress}{\f11\cgrid0 .  
\par }\pard \li720\keep\nowidctlpar\adjustright {\f2\cgrid0 IDLE/any\line       Illegal.  IDLE state only exists after bzCompressEnd or\line       before bzCompressInit.\line       Return value = BZ_SEQUENCE_ERROR\line \line RUNNING/BZ_RUN\line   
    Compress from next_in to next_out as much as possible.\line       Next state = RUNNING\line       Return value = BZ_RUN_OK\line \line RUNNING/BZ_FLUSH\line       Remember current value of next_in.  Compress from next_in\line 
      to next_out as much as possible, but do not accept any more input.\line       Next state = FLUSHING\line       Return value = BZ_FLUSH_OK\line \line RUNNING/BZ_FINISH\line       Remember current value of next_in.  Compress from next_in\line 
      to next_out as much as possible, but do not accept any more input.\line       Next state = FINISHING\line       Return value = BZ_FINISH_OK\line \line FLUSHING/BZ_FLUSH\line       Compress from next_in to next_out as much as possible,\line 
      but do not accept any more input.\line       If all the existing input has been used up\line          Next state = RUNNING; Return value = BZ_RUN_OK\line       else\line          Next state = FLUSHING; Return value = BZ_FLUSH_OK\line \line 
FLUSHING/other\line       Illegal.\line       Return value = BZ_SEQUENCE_ERROR\line \line FINISHING/BZ_FINISH\line       Compress from next_in to next_out as much as possible,\line       but to not accept any more input.\line 
      If all the existing input has been used up and all compressed\line       output has been removed\line          Next state = IDLE; Return value = BZ_STREAM_END\line       else\line          Next state = FINISHING; Return value = BZ_FINISHING\line 
\line FINISHING/other\line       Illegal.\line       Return value = BZ_SEQUENCE_ERROR
\par }\pard \nowidctlpar\adjustright {\f11\cgrid0 
\par That still looks complicated?  Well, fair enough.  The usual sequence of calls for compressing a load of data is: 
\par }\pard \fi-720\li720\nowidctlpar\tx144\tx720\adjustright {\f11\cgrid0 \tab }{\f3\cgrid0 \'b7}{\f11\cgrid0 \tab Get started with }{\f2\cgrid0 bzCompressInit}{\f11\cgrid0 .
\par 
\par \tab }{\f3\cgrid0 \'b7}{\f11\cgrid0 \tab Shovel data in and shlurp out its compressed form using zero or more calls of }{\f2\cgrid0 bzCompress}{\f11\cgrid0  with action = }{\f2\cgrid0 BZ_RUN}{\f11\cgrid0 .
\par 
\par \tab }{\f3\cgrid0 \'b7}{\f11\cgrid0 \tab Finish up.  Repeatedly call }{\f2\cgrid0 bzCompress}{\f11\cgrid0  with action = }{\f2\cgrid0 BZ_FINISH}{\f11\cgrid0 , copying out the compressed output, until }{\f2\cgrid0 BZ_STREAM_END}{\f11\cgrid0  is returned.

\par 
\par \tab }{\f3\cgrid0 \'b7}{\f11\cgrid0 \tab Close up and go home.  Call }{\f2\cgrid0 bzCompressEnd}{\f11\cgrid0 .
\par }\pard \nowidctlpar\adjustright {\f11\cgrid0 If the data you want to compress fits into your input buffer all at once, you can skip the calls of }{\f2\cgrid0 bzCompress ( ..., BZ_RUN )}{\f11\cgrid0  and just do the }{\f2\cgrid0 
bzCompress ( ..., BZ_FINISH )}{\f11\cgrid0  calls.
\par 
\par All required memory is allocated by }{\f2\cgrid0 bzCompressInit}{\f11\cgrid0 .  The compression library can accept any data at all (obviously).  So you shouldn't get any error return values from the }{\f2\cgrid0 bzCompress}{\f11\cgrid0 
 calls.  If you do, they will be }{\f2\cgrid0 BZ_SEQUENCE_ERROR}{\f11\cgrid0 , and indicate a bug in your programming.
\par 
\par Trivial other possible return values: 
\par }\pard \li720\keep\nowidctlpar\adjustright {\f2\cgrid0       BZ_PARAM_ERROR\line          if strm is NULL, or strm->s is NULL
\par }\pard \nowidctlpar\adjustright {\f11\cgrid0 
\par }{\b\f2\cgrid0 bzCompressEnd}{\f11\cgrid0 
\par 
\par }\pard \li720\keep\nowidctlpar\adjustright {\f2\cgrid0 int bzCompressEnd ( bz_stream *strm );
\par }\pard \nowidctlpar\adjustright {\f11\cgrid0 Releases all memory associated with a compression stream.
\par 
\par Possible return values: 
\par }\pard \li720\keep\nowidctlpar\adjustright {\f2\cgrid0    BZ_PARAM_ERROR    if strm is NULL or strm->s is NULL\line    BZ_OK    otherwise
\par }\pard \nowidctlpar\adjustright {\f11\cgrid0 
\par }{\b\f2\cgrid0 bzDecompressInit}{\f11\cgrid0 
\par 
\par }\pard \li720\keep\nowidctlpar\adjustright {\f2\cgrid0 int bzDecompressInit ( bz_stream *strm, int verbosity, int small );
\par }\pard \nowidctlpar\adjustright {\f11\cgrid0 Prepares for decompression.  As with }{\f2\cgrid0 bzCompressInit}{\f11\cgrid0 , a }{\f2\cgrid0 bz_stream}{\f11\cgrid0  record should be allocated and initialised before the call.  Fields }{\f2\cgrid0 bzalloc}{
\f11\cgrid0 , }{\f2\cgrid0 bzfree}{\f11\cgrid0  and }{\f2\cgrid0 opaque}{\f11\cgrid0  should be set if a custom memory allocator is required, or made }{\f2\cgrid0 NULL}{\f11\cgrid0  for the normal }{\f2\cgrid0 malloc}{\f11\cgrid0 /}{\f2\cgrid0 free}{
\f11\cgrid0  routines.  Upon return, the internal state will have been initialised, and }{\f2\cgrid0 total_in}{\f11\cgrid0  and }{\f2\cgrid0 total_out}{\f11\cgrid0  will be zero.
\par 
\par For the meaning of parameter }{\f2\cgrid0 verbosity}{\f11\cgrid0 , see }{\f2\cgrid0 bzCompressInit}{\f11\cgrid0 .
\par 
\par If }{\f2\cgrid0 small}{\f11\cgrid0 
 is nonzero, the library will use an alternative decompression algorithm which uses less memory but at the cost of decompressing more slowly (roughly speaking, half the speed, but the maximum memory requirement drops to around 2300k).  See Chapter 2 for m
ore information on memory management.
\par 
\par Note that the amount of memory needed to decompress a stream cannot be determined until the stream's header has been read, so even if }{\f2\cgrid0 bzDecompressInit}{\f11\cgrid0  succeeds, a subsequent }{\f2\cgrid0 bzDecompress}{\f11\cgrid0 
 could fail with }{\f2\cgrid0 BZ_MEM_ERROR}{\f11\cgrid0 .
\par 
\par Possible return values: 
\par }\pard \li720\keep\nowidctlpar\adjustright {\f2\cgrid0       BZ_PARAM_ERROR\line          if (small != 0 && small != 1)\line          or (verbosity < 0 || verbosity > 4)\line       BZ_MEM_ERROR\line          if insufficient memory is available
\par }\pard \nowidctlpar\adjustright {\f11\cgrid0 
\par Allowable next actions: 
\par }\pard \li720\keep\nowidctlpar\adjustright {\f2\cgrid0       bzDecompress\line          if BZ_OK was returned\line       no specific action required in case of error
\par }\pard \nowidctlpar\adjustright {\f11\cgrid0 
\par }{\b\f2\cgrid0 bzDecompress}{\f11\cgrid0 
\par 
\par }\pard \li720\keep\nowidctlpar\adjustright {\f2\cgrid0 int bzDecompress ( bz_stream *strm );
\par }\pard \nowidctlpar\adjustright {\f11\cgrid0 Provides more input and/out output buffer space for the library.  The caller maintains input and output buffers, and uses }{\f2\cgrid0 bzDecompress}{\f11\cgrid0  to transfer data between them.
\par 
\par Before each call to }{\f2\cgrid0 bzDecompress}{\f11\cgrid0 , }{\f2\cgrid0 next_in}{\f11\cgrid0  should point at the compressed data, and }{\f2\cgrid0 avail_in}{\f11\cgrid0  should indicate how many bytes the library may read.  }{\f2\cgrid0 bzDecompress}{
\f11\cgrid0  updates }{\f2\cgrid0 next_in}{\f11\cgrid0 , }{\f2\cgrid0 avail_in}{\f11\cgrid0  and }{\f2\cgrid0 total_in}{\f11\cgrid0  to reflect the number of bytes it has read.
\par 
\par Similarly, }{\f2\cgrid0 next_out}{\f11\cgrid0  should point to a buffer in which the uncompressed output is to be placed, with }{\f2\cgrid0 avail_out}{\f11\cgrid0  indicating how much output space is available.  }{\f2\cgrid0 bzCompress}{\f11\cgrid0 
 updates }{\f2\cgrid0 next_out}{\f11\cgrid0 , }{\f2\cgrid0 avail_out}{\f11\cgrid0  and }{\f2\cgrid0 total_out}{\f11\cgrid0  to reflect the number of bytes output.
\par 
\par You may provide and remove as little or as much data as you like on each call of }{\f2\cgrid0 bzDecompress}{\f11\cgrid0 .  In the limit, it is acceptable to supply and remove data one byte at a time, although this would be terribly inefficien
t.  You should always ensure that at least one byte of output space is available at each call.
\par 
\par Use of }{\f2\cgrid0 bzDecompress}{\f11\cgrid0  is simpler than }{\f2\cgrid0 bzCompress}{\f11\cgrid0 .
\par 
\par You should provide input and remove output as described above, and repeatedly call }{\f2\cgrid0 bzDecompress}{\f11\cgrid0  until }{\f2\cgrid0 BZ_STREAM_END}{\f11\cgrid0  is returned.  Appearance of }{\f2\cgrid0 BZ_STREAM_END}{\f11\cgrid0  denotes that }{
\f2\cgrid0 bzDecompress}{\f11\cgrid0  has detected the logical end of the compressed stream.  }{\f2\cgrid0 bzDecompress}{\f11\cgrid0  will not produce }{\f2\cgrid0 BZ_STREAM_END}{\f11\cgrid0 
 until all output data has been placed into the output buffer, so once }{\f2\cgrid0 BZ_STREAM_END}{\f11\cgrid0  appears, you are guaranteed to have available all the decompressed output, and }{\f2\cgrid0 bzDecompressEnd}{\f11\cgrid0  can safely be called.

\par 
\par If case of an error return value, you should call }{\f2\cgrid0 bzDecompressEnd}{\f11\cgrid0  to clean up and release memory.
\par 
\par Possible return values: 
\par }\pard \li720\keep\nowidctlpar\adjustright {\f2\cgrid0       BZ_PARAM_ERROR\line          if strm is NULL or strm->s is NULL\line          or strm->avail_out < 1\line       BZ_DATA_ERROR\line 
         if a data integrity error is detected in the compressed stream\line       BZ_DATA_ERROR_MAGIC\line          if the compressed stream doesn't begin with the right magic bytes\line       BZ_MEM_ERROR\line 
         if there wasn't enough memory available\line       BZ_STREAM_END\line          if the logical end of the data stream was detected and all\line          output in has been consumed, eg s->avail_out > 0\line       BZ_OK\line          otherwise

\par }\pard \nowidctlpar\adjustright {\f11\cgrid0 Allowable next actions: 
\par }\pard \li720\keep\nowidctlpar\adjustright {\f2\cgrid0       bzDecompress\line          if BZ_OK was returned\line       bzDecompressEnd\line          otherwise
\par }\pard \nowidctlpar\adjustright {\f11\cgrid0 
\par }{\b\f2\cgrid0 bzDecompressEnd}{\f11\cgrid0 
\par 
\par }\pard \li720\keep\nowidctlpar\adjustright {\f2\cgrid0 int bzDecompressEnd ( bz_stream *strm );
\par }\pard \nowidctlpar\adjustright {\f11\cgrid0 Releases all memory associated with a decompression stream.
\par 
\par Possible return values: 
\par }\pard \li720\keep\nowidctlpar\adjustright {\f2\cgrid0       BZ_PARAM_ERROR\line          if strm is NULL or strm->s is NULL\line       BZ_OK\line          otherwise
\par }\pard \nowidctlpar\adjustright {\f11\cgrid0 
\par Allowable next actions: 
\par }\pard \li720\keep\nowidctlpar\adjustright {\f2\cgrid0       None.
\par }\pard \nowidctlpar\adjustright {\f11\cgrid0 
\par }{\b\f11\cgrid0 High-level interface}{\f11\cgrid0 
\par 
\par This interface provides functions for reading and writing }{\f2\cgrid0 bzip2}{\f11\cgrid0  format files.  First, some general points.
\par 
\par }\pard \fi-720\li720\nowidctlpar\tx144\tx720\adjustright {\f11\cgrid0 \tab }{\f3\cgrid0 \'b7}{\f11\cgrid0 \tab All of the functions take an }{\f2\cgrid0 int*}{\f11\cgrid0  first argument,   }{\f2\cgrid0 bzerror}{\f11\cgrid0 .    After each call, }{
\f2\cgrid0 bzerror}{\f11\cgrid0  should be consulted first to determine   the outcome of the call.  If }{\f2\cgrid0 bzerror}{\f11\cgrid0  is }{\f2\cgrid0 BZ_OK}{\f11\cgrid0 ,   the call completed   successfully, and only
 then should the return value of the function   (if any) be consulted.  If }{\f2\cgrid0 bzerror}{\f11\cgrid0  is }{\f2\cgrid0 BZ_IO_ERROR}{\f11\cgrid0 ,   there was an error   reading/writing the underlying compressed file, and you should   then consult }
{\f2\cgrid0 errno}{\f11\cgrid0 /}{\f2\cgrid0 perror}{\f11\cgrid0  to determine the   cause of the difficulty.    }{\f2\cgrid0 bzerror}{\f11\cgrid0  may also be set to various other values; precise details are   given on a per-function basis below.
\par 
\par \tab }{\f3\cgrid0 \'b7}{\f11\cgrid0 \tab If }{\f2\cgrid0 bzerror}{\f11\cgrid0  indicates an error   (ie, anything except }{\f2\cgrid0 BZ_OK}{\f11\cgrid0  and }{\f2\cgrid0 BZ_STREAM_END}{\f11\cgrid0 ),   you should immediately call }{\f2\cgrid0 bzReadClose
}{\f11\cgrid0  (or }{\f2\cgrid0 bzWriteClose}{\f11\cgrid0 ,   depending on whether you are attempting to read or to write)   to free up all resources associated   with the stream.  Once an error has been indicated, behaviour of all calls   except }{
\f2\cgrid0 bzReadClose}{\f11\cgrid0  (}{\f2\cgrid0 bzWriteClose}{\f11\cgrid0 ) is undefined.    The implication is that (1) }{\f2\cgrid0 bzerror}{\f11\cgrid0  should   be checked after each call, and (2) if }{\f2\cgrid0 bzerror}{\f11\cgrid0 
 indicates an error,   }{\f2\cgrid0 bzReadClose}{\f11\cgrid0  (}{\f2\cgrid0 bzWriteClose}{\f11\cgrid0 ) should then be called to clean up.
\par 
\par \tab }{\f3\cgrid0 \'b7}{\f11\cgrid0 \tab The }{\f2\cgrid0 FILE*}{\f11\cgrid0  arguments passed to    }{\f2\cgrid0 bzReadOpen}{\f11\cgrid0 /}{\f2\cgrid0 bzWriteOpen}{\f11\cgrid0    
should be set to binary mode.    Most Unix systems will do this by default, but other platforms,   including Windows and Mac, will not.  If you omit this, you may   encounter problems when moving code to new platforms.
\par 
\par \tab }{\f3\cgrid0 \'b7}{\f11\cgrid0 \tab Memory allocation requests are handled by   }{\f2\cgrid0 malloc}{\f11\cgrid0 /}{\f2\cgrid0 free}{\f11\cgrid0 
.    At present   there is no facility for user-defined memory allocators in the file I/O   functions (could easily be added, though).
\par }\pard \nowidctlpar\adjustright {\f11\cgrid0 
\par }{\b\f2\cgrid0 bzReadOpen}{\f11\cgrid0 
\par 
\par }\pard \li720\keep\nowidctlpar\adjustright {\f2\cgrid0    typedef void BZFILE;\line \line    BZFILE *bzReadOpen ( int *bzerror, FILE *f,\line                         int small, int verbosity,\line                         void *unused, int nUnused );

\par }\pard \nowidctlpar\adjustright {\f11\cgrid0 Prepare to read compressed data from file handle }{\f2\cgrid0 f}{\f11\cgrid0 .  }{\f2\cgrid0 f}{\f11\cgrid0  should refer to a file which has been opened for reading, and for which the error indicator (}{
\f2\cgrid0 ferror(f)}{\f11\cgrid0 )is not set.  If }{\f2\cgrid0 small}{\f11\cgrid0  is 1, the library will try to decompress using less memory, at the expense of speed.
\par 
\par For reasons explained below, }{\f2\cgrid0 bzRead}{\f11\cgrid0  will decompress the }{\f2\cgrid0 nUnused}{\f11\cgrid0  bytes starting at }{\f2\cgrid0 unused}{\f11\cgrid0 , before starting to read from the file }{\f2\cgrid0 f}{\f11\cgrid0 .  At most }{
\f2\cgrid0 BZ_MAX_UNUSED}{\f11\cgrid0  bytes may be supplied like this.  If this facility is not required, you should pass }{\f2\cgrid0 NULL}{\f11\cgrid0  and }{\f2\cgrid0 0}{\f11\cgrid0  for }{\f2\cgrid0 unused}{\f11\cgrid0  and n}{\f2\cgrid0 Unused}{
\f11\cgrid0  respectively.
\par 
\par For the meaning of parameters }{\f2\cgrid0 small}{\f11\cgrid0  and }{\f2\cgrid0 verbosity}{\f11\cgrid0 , see }{\f2\cgrid0 bzDecompressInit}{\f11\cgrid0 .
\par 
\par The amount of memory needed to decompress a file cannot be determined until the file's header has been read.  So it is possible that }{\f2\cgrid0 bzReadOpen}{\f11\cgrid0  returns }{\f2\cgrid0 BZ_OK}{\f11\cgrid0  but a subsequent call of }{\f2\cgrid0 
bzRead}{\f11\cgrid0  will return }{\f2\cgrid0 BZ_MEM_ERROR}{\f11\cgrid0 .
\par 
\par Possible assignments to }{\f2\cgrid0 bzerror}{\f11\cgrid0 : 
\par }\pard \li720\keep\nowidctlpar\adjustright {\f2\cgrid0       BZ_PARAM_ERROR\line          if f is NULL\line          or small is neither 0 nor 1\line          or (unused == NULL && nUnused != 0)\line 
         or (unused != NULL && !(0 <= nUnused <= BZ_MAX_UNUSED))\line       BZ_IO_ERROR\line          if ferror(f) is nonzero\line       BZ_MEM_ERROR\line          if insufficient memory is available\line       BZ_OK\line          otherwise.
\par }\pard \nowidctlpar\adjustright {\f11\cgrid0 
\par Possible return values: 
\par }\pard \li720\keep\nowidctlpar\adjustright {\f2\cgrid0       Pointer to an abstract BZFILE\line          if bzerror is BZ_OK\line       NULL\line          otherwise
\par }\pard \nowidctlpar\adjustright {\f11\cgrid0 
\par Allowable next actions: 
\par }\pard \li720\keep\nowidctlpar\adjustright {\f2\cgrid0       bzRead\line          if bzerror is BZ_OK\line       bzClose\line          otherwise
\par }\pard \nowidctlpar\adjustright {\f11\cgrid0 
\par }{\b\f2\cgrid0 bzRead}{\f11\cgrid0 
\par 
\par }\pard \li720\keep\nowidctlpar\adjustright {\f2\cgrid0    int bzRead ( int *bzerror, BZFILE *b, void *buf, int len );
\par }\pard \nowidctlpar\adjustright {\f11\cgrid0 Reads up to }{\f2\cgrid0 len}{\f11\cgrid0  (uncompressed) bytes from the compressed file }{\f2\cgrid0 b}{\f11\cgrid0  into the buffer }{\f2\cgrid0 buf}{\f11\cgrid0 .  If the read was successful, }{\f2\cgrid0 
bzerror}{\f11\cgrid0  is set to }{\f2\cgrid0 BZ_OK}{\f11\cgrid0  and the number of bytes read is returned.  If the logical end-of-stream was detected, }{\f2\cgrid0 bzerror}{\f11\cgrid0  will be set to }{\f2\cgrid0 BZ_STREAM_END}{\f11\cgrid0 
, and the number of bytes read is returned.  All other }{\f2\cgrid0 bzerror}{\f11\cgrid0  values denote an error.
\par 
\par }{\f2\cgrid0 bzRead}{\f11\cgrid0  will supply }{\f2\cgrid0 len}{\f11\cgrid0  bytes, unless the logical stream end is detected or an error occu
rs.  Because of this, it is possible to detect the stream end by observing when the number of bytes returned is less than the number requested.  Nevertheless, this is regarded as inadvisable; you should instead check }{\f2\cgrid0 bzerror}{\f11\cgrid0 
 after every call and watch out for }{\f2\cgrid0 BZ_STREAM_END}{\f11\cgrid0 .
\par 
\par Internally, }{\f2\cgrid0 bzRead}{\f11\cgrid0  copies data from the compressed file in chunks of size }{\f2\cgrid0 BZ_MAX_UNUSED}{\f11\cgrid0 
 bytes before decompressing it.  If the file contains more bytes than strictly needed to reach the logical end-of-stream, }{\f2\cgrid0 bzRead}{\f11\cgrid0  will almost certainly read some of the trailing data before signalling }{\f2\cgrid0 BZ_SEQUENCE_END
}{\f11\cgrid0 .  To collect the read but unused data once }{\f2\cgrid0 BZ_SEQUENCE_END}{\f11\cgrid0  has appeared, call }{\f2\cgrid0 bzReadGetUnused}{\f11\cgrid0  immediately before }{\f2\cgrid0 bzReadClose}{\f11\cgrid0 .
\par 
\par Possible assignments to }{\f2\cgrid0 bzerror}{\f11\cgrid0 : 
\par }\pard \li720\keep\nowidctlpar\adjustright {\f2\cgrid0       BZ_PARAM_ERROR\line          if b is NULL or buf is NULL or len < 0\line       BZ_SEQUENCE_ERROR\line          if b was opened with bzWriteOpen\line       BZ_IO_ERROR\line 
         if there is an error reading from the compressed file\line       BZ_UNEXPECTED_EOF\line          if the compressed file ended before the logical end-of-stream was detected\line       BZ_DATA_ERROR\line 
         if a data integrity error was detected in the compressed stream\line       BZ_DATA_ERROR_MAGIC\line          if the stream does not begin with the requisite header bytes (ie, is not\line         
 a bzip2 data file).  This is really a special case of BZ_DATA_ERROR.\line       BZ_MEM_ERROR\line          if insufficient memory was available\line       BZ_STREAM_END\line          if the logical end of stream was detected.\line       BZ_OK\line 
         otherwise.
\par }\pard \nowidctlpar\adjustright {\f11\cgrid0 
\par Possible return values: 
\par }\pard \li720\keep\nowidctlpar\adjustright {\f2\cgrid0       number of bytes read\line          if bzerror is BZ_OK or BZ_STREAM_END\line       undefined\line          otherwise
\par }\pard \nowidctlpar\adjustright {\f11\cgrid0 
\par Allowable next actions: 
\par }\pard \li720\keep\nowidctlpar\adjustright {\f2\cgrid0       collect data from buf, then bzRead or bzReadClose\line          if bzerror is BZ_OK\line       collect data from buf, then bzReadClose or bzReadGetUnused\line 
         if bzerror is BZ_SEQUENCE_END\line       bzReadClose\line          otherwise
\par }\pard \nowidctlpar\adjustright {\f11\cgrid0 
\par }{\b\f2\cgrid0 bzReadGetUnused}{\f11\cgrid0 
\par 
\par }\pard \li720\keep\nowidctlpar\adjustright {\f2\cgrid0    void bzReadGetUnused ( int* bzerror, BZFILE *b,\line                           void** unused, int* nUnused );
\par }\pard \nowidctlpar\adjustright {\f11\cgrid0 Returns data which was read from the compressed file but was not needed to get to the logical end-of-stream.  }{\f2\cgrid0 *unused}{\f11\cgrid0  is set to the address of the data, and }{\f2\cgrid0 *nUnused}{
\f11\cgrid0  to the number of bytes.  }{\f2\cgrid0 *nUnused}{\f11\cgrid0  will be set to a value between }{\f2\cgrid0 0}{\f11\cgrid0  and }{\f2\cgrid0 BZ_MAX_UNUSED}{\f11\cgrid0  inclusive.
\par 
\par This function may only be called once }{\f2\cgrid0 bzRead}{\f11\cgrid0  has signalled }{\f2\cgrid0 BZ_STREAM_END}{\f11\cgrid0  but before }{\f2\cgrid0 bzReadClose}{\f11\cgrid0 .
\par 
\par Possible assignments to }{\f2\cgrid0 bzerror}{\f11\cgrid0 : 
\par }\pard \li720\keep\nowidctlpar\adjustright {\f2\cgrid0       BZ_PARAM_ERROR\line          if b is NULL\line          or unused is NULL or nUnused is NULL\line       BZ_SEQUENCE_ERROR\line          if BZ_STREAM_END has not been signalled\line 
         or if b was opened with bzWriteOpen\line      BZ_OK\line          otherwise
\par }\pard \nowidctlpar\adjustright {\f11\cgrid0 
\par Allowable next actions: 
\par }\pard \li720\keep\nowidctlpar\adjustright {\f2\cgrid0       bzReadClose
\par }\pard \nowidctlpar\adjustright {\f11\cgrid0 
\par }{\b\f2\cgrid0 bzReadClose}{\f11\cgrid0 
\par 
\par }\pard \li720\keep\nowidctlpar\adjustright {\f2\cgrid0    void bzReadClose ( int *bzerror, BZFILE *b );
\par }\pard \nowidctlpar\adjustright {\f11\cgrid0 Releases all memory pertaining to the compressed file }{\f2\cgrid0 b}{\f11\cgrid0 .  }{\f2\cgrid0 bzReadClose}{\f11\cgrid0  does not call }{\f2\cgrid0 fclose}{\f11\cgrid0 
 on the underlying file handle, so you should do that yourself if appropriate.  }{\f2\cgrid0 bzReadClose}{\f11\cgrid0  should be called to clean up after all error situations.
\par 
\par Possible assignments to }{\f2\cgrid0 bzerror}{\f11\cgrid0 : 
\par }\pard \li720\keep\nowidctlpar\adjustright {\f2\cgrid0       BZ_SEQUENCE_ERROR\line          if b was opened with bzOpenWrite\line       BZ_OK\line          otherwise
\par }\pard \nowidctlpar\adjustright {\f11\cgrid0 
\par Allowable next actions: 
\par }\pard \li720\keep\nowidctlpar\adjustright {\f2\cgrid0       none
\par }\pard \nowidctlpar\adjustright {\f11\cgrid0 
\par }{\b\f2\cgrid0 bzWriteOpen}{\f11\cgrid0 
\par 
\par }\pard \li720\keep\nowidctlpar\adjustright {\f2\cgrid0    BZFILE *bzWriteOpen ( int *bzerror, FILE *f,\line                          int blockSize100k, int verbosity,\line                          int workFactor );
\par }\pard \nowidctlpar\adjustright {\f11\cgrid0 Prepare to write compressed data to file handle }{\f2\cgrid0 f}{\f11\cgrid0 .  }{\f2\cgrid0 f}{\f11\cgrid0  should refer to a file which has been opened for writing, and for which the error indicator (}{
\f2\cgrid0 ferror(f)}{\f11\cgrid0 )is not set.
\par 
\par For the meaning of parameters }{\f2\cgrid0 blockSize100k}{\f11\cgrid0 , }{\f2\cgrid0 verbosity}{\f11\cgrid0  and }{\f2\cgrid0 workFactor}{\f11\cgrid0 , see
\par }{\f2\cgrid0 bzCompressInit}{\f11\cgrid0 .
\par 
\par All required memory is allocated at this stage, so if the call completes successfully, }{\f2\cgrid0 BZ_MEM_ERROR}{\f11\cgrid0  cannot be signalled by a subsequent call to }{\f2\cgrid0 bzWrite}{\f11\cgrid0 .
\par 
\par Possible assignments to }{\f2\cgrid0 bzerror}{\f11\cgrid0 : 
\par }\pard \li720\keep\nowidctlpar\adjustright {\f2\cgrid0       BZ_PARAM_ERROR\line          if f is NULL\line          or blockSize100k < 1 or blockSize100k > 9\line       BZ_IO_ERROR\line          if ferror(f) is nonzero\line       BZ_MEM_ERROR\line 
         if insufficient memory is available\line       BZ_OK\line          otherwise
\par }\pard \nowidctlpar\adjustright {\f11\cgrid0 
\par Possible return values: 
\par }\pard \li720\keep\nowidctlpar\adjustright {\f2\cgrid0       Pointer to an abstract BZFILE\line          if bzerror is BZ_OK\line       NULL\line          otherwise
\par }\pard \nowidctlpar\adjustright {\f11\cgrid0 
\par Allowable next actions: 
\par }\pard \li720\keep\nowidctlpar\adjustright {\f2\cgrid0       bzWrite\line          if bzerror is BZ_OK\line          (you could go directly to bzWriteClose, but this would be pretty pointless)\line       bzWriteClose\line          otherwise
\par }\pard \nowidctlpar\adjustright {\f11\cgrid0 
\par }{\b\f2\cgrid0 bzWrite}{\f11\cgrid0 
\par 
\par }\pard \li720\keep\nowidctlpar\adjustright {\f2\cgrid0    void bzWrite ( int *bzerror, BZFILE *b, void *buf, int len );
\par }\pard \nowidctlpar\adjustright {\f11\cgrid0 Absorbs }{\f2\cgrid0 len}{\f11\cgrid0  bytes from the buffer }{\f2\cgrid0 buf}{\f11\cgrid0 , eventually to be compressed and written to the file.
\par 
\par Possible assignments to }{\f2\cgrid0 bzerror}{\f11\cgrid0 : 
\par }\pard \li720\keep\nowidctlpar\adjustright {\f2\cgrid0       BZ_PARAM_ERROR\line          if b is NULL or buf is NULL or len < 0\line       BZ_SEQUENCE_ERROR\line          if b was opened with bzReadOpen\line       BZ_IO_ERROR\line 
         if there is an error writing the compressed file.\line       BZ_OK\line          otherwise
\par }\pard \nowidctlpar\adjustright {\f11\cgrid0 
\par }{\b\f2\cgrid0 bzWriteClose}{\f11\cgrid0 
\par 
\par }\pard \li720\keep\nowidctlpar\adjustright {\f2\cgrid0    int bzWriteClose ( int *bzerror, BZFILE* f,\line                       int abandon,\line                       unsigned int* nbytes_in,\line                       unsigned int* nbytes_out );
\par }\pard \nowidctlpar\adjustright {\f11\cgrid0 
\par Compresses and flushes to the compressed file all data so far supplied by }{\f2\cgrid0 bzWrite}{\f11\cgrid0 .  The logical end-of-stream markers are also written, so subsequent calls to }{\f2\cgrid0 bzWrite}{\f11\cgrid0 
 are illegal.  All memory associated with the compressed file }{\f2\cgrid0 b}{\f11\cgrid0  is released.  }{\f2\cgrid0 fflush}{\f11\cgrid0  is called on the compressed file, but it is not }{\f2\cgrid0 fclose}{\f11\cgrid0 'd.
\par 
\par If }{\f2\cgrid0 bzWriteClose}{\f11\cgrid0  is called to clean up after an error, the only action is to re
lease the memory.  The library records the error codes issued by previous calls, so this situation will be detected automatically.  There is no attempt to complete the compression operation, nor to }{\f2\cgrid0 fflush}{\f11\cgrid0 
 the compressed file.  You can force this behaviour to happen even in the case of no error, by passing a nonzero value to }{\f2\cgrid0 abandon}{\f11\cgrid0 .
\par 
\par If }{\f2\cgrid0 nbytes_in}{\f11\cgrid0  is non-null, }{\f2\cgrid0 *nbytes_in}{\f11\cgrid0  will be set to be the total volume of uncompressed data handled.  Similarly, }{\f2\cgrid0 nbytes_out}{\f11\cgrid0 
 will be set to the total volume of compressed data written.
\par 
\par Possible assignments to }{\f2\cgrid0 bzerror}{\f11\cgrid0 : 
\par }\pard \li720\keep\nowidctlpar\adjustright {\f2\cgrid0       BZ_SEQUENCE_ERROR\line          if b was opened with bzReadOpen\line       BZ_IO_ERROR\line          if there is an error writing the compressed file\line       BZ_OK\line          otherwise

\par }\pard \nowidctlpar\adjustright {\f11\cgrid0 
\par }{\b\f11\cgrid0 Handling embedded compressed data streams}{\f11\cgrid0 
\par 
\par The high-level library facilitates use of }{\f2\cgrid0 bzip2}{\f11\cgrid0  data streams which form some part of a surrounding, larger data stream.  
\par }\pard \fi-720\li720\nowidctlpar\tx144\tx720\adjustright {\f11\cgrid0 \tab }{\f3\cgrid0 \'b7}{\f11\cgrid0 \tab For writing, the library takes an open file handle, writes compressed data to it, }{\f2\cgrid0 fflush}{\f11\cgrid0 es it but does not }{
\f2\cgrid0 fclose}{\f11\cgrid0  it.  The calling application can write its own data before and after the compressed data stream, using that same file handle.
\par 
\par \tab }{\f3\cgrid0 \'b7}{\f11\cgrid0 \tab Reading is more complex, and the facilities are not as general as they could be since generality is hard to reconcile with efficiency.  }{\f2\cgrid0 bzRead}{\f11\cgrid0 
 reads from the compressed file in blocks of size }{\f2\cgrid0 BZ_MAX_UNUSED}{\f11\cgrid0  bytes, and in doing so probably will overshoot the logical end of compressed stream.  To recover this data once decompression has ended, call }{\f2\cgrid0 
bzReadGetUnused}{\f11\cgrid0  after the last call of }{\f2\cgrid0 bzRead}{\f11\cgrid0  (the one returning }{\f2\cgrid0 BZ_STREAM_END}{\f11\cgrid0 ) but before calling }{\f2\cgrid0 bzReadClose}{\f11\cgrid0 .
\par }\pard \nowidctlpar\adjustright {\f11\cgrid0 
\par This mechanism makes it easy to decompress multiple }{\f2\cgrid0 bzip2}{\f11\cgrid0  streams placed end-to-end.  As the end of one stream, when }{\f2\cgrid0 bzRead}{\f11\cgrid0  returns }{\f2\cgrid0 BZ_STREAM_END}{\f11\cgrid0 , call }{\f2\cgrid0 
bzReadGetUnused}{\f11\cgrid0  to collect the unused data (copy it into your own buffer somewhere).  That data forms the start of the next compressed stream.  To start uncompressing that next stream, call }{\f2\cgrid0 bzReadOpen}{\f11\cgrid0 
 again, feeding in the unused data via the }{\f2\cgrid0 unused}{\f11\cgrid0 /}{\f2\cgrid0 nUnused}{\f11\cgrid0  parameters.  Keep doing this until }{\f2\cgrid0 BZ_STREAM_END}{\f11\cgrid0  return coincides with the physical end of file (}{\f2\cgrid0 
feof(f)}{\f11\cgrid0 ).  In this situation }{\f2\cgrid0 bzReadGetUnused}{\f11\cgrid0  will of course return no data.
\par 
\par This should give some feel for how the high-level interface can be used.  If you require extra flexibility, you'll have to bite the bullet and get to grips with the low-level interface.
\par 
\par }{\b\f11\cgrid0 Standard file-reading/writing code}{\f11\cgrid0 
\par 
\par Here's how you'd write data to a compressed file: 
\par }\pard \li720\keep\nowidctlpar\adjustright {\f2\cgrid0 FILE*   f;\line BZFILE* b;\line int     nBuf;\line char    buf[ /* whatever size you like */ ];\line int     bzerror;\line int     nWritten;\line \line f = fopen ( "myfile.bz2", "w" );\line if (!f) \{
\line    /* handle error */\line \}\line b = bzWriteOpen ( &bzerror, f, 9 );\line if (bzerror != BZ_OK) \{\line    bzWriteClose ( b );\line    /* handle error */\line \}\line \line while ( /* condition */ ) \{\line 
   /* get data to write into buf, and set nBuf appropriately */\line    nWritten = bzWrite ( &bzerror, b, buf, nBuf );\line    if (bzerror == BZ_IO_ERROR) \{\line       bzWriteClose ( &bzerror, b );\line       /* handle error */\line    \}\line \}\line 
\line bzWriteClose ( &bzerror, b );\line if (bzerror == BZ_IO_ERROR) \{\line    /* handle error */\line \}
\par }\pard \nowidctlpar\adjustright {\f11\cgrid0 And to read from a compressed file: 
\par }\pard \li720\keep\nowidctlpar\adjustright {\f2\cgrid0 FILE*   f;\line BZFILE* b;\line int     nBuf;\line char    buf[ /* whatever size you like */ ];\line int     bzerror;\line int     nWritten;\line \line f = fopen ( "myfile.bz2", "r" );\line if (!f) \{
\line    /* handle error */\line \}\line b = bzReadOpen ( &bzerror, f, 0, NULL, 0 );\line if (bzerror != BZ_OK) \{\line    bzReadClose ( &bzerror, b );\line    /* handle error */\line \}\line \line bzerror = BZ_OK;\line while (bzerror == BZ_OK
 && /* arbitrary other conditions */) \{\line    nBuf = bzRead ( &bzerror, b, buf, /* size of buf */ );\line    if (bzerror == BZ_OK) \{\line       /* do something with buf[0 .. nBuf-1] */\line    \}\line \}\line if (bzerror != BZ_STREAM_END) \{\line 
   bzReadClose ( &bzerror, b );\line    /* handle error */\line \} else \{\line    bzReadClose ( &bzerror );\line \}
\par }\pard \nowidctlpar\adjustright {\f11\cgrid0 
\par }{\b\f11\cgrid0 Utility functions}{\f11\cgrid0 
\par 
\par }{\b\f2\cgrid0 bzBuffToBuffCompress}{\f11\cgrid0 
\par 
\par }\pard \li720\keep\nowidctlpar\adjustright {\f2\cgrid0    int bzBuffToBuffCompress( char*         dest,\line                              unsigned int* destLen,\line                              char*         source,\line              
                unsigned int  sourceLen,\line                              int           blockSize100k,\line                              int           verbosity,\line                              int           workFactor );
\par }\pard \nowidctlpar\adjustright {\f11\cgrid0 Attempts to compress the data in }{\f2\cgrid0 source[0 .. sourceLen-1]}{\f11\cgrid0  into the destination buffer, }{\f2\cgrid0 dest[0 .. *destLen-1]}{\f11\cgrid0 .  If the destination buffer is big enough, }{
\f2\cgrid0 *destLen}{\f11\cgrid0  is set to the size of the compressed data, and }{\f2\cgrid0 BZ_OK}{\f11\cgrid0  is returned.  If the compressed data won't fit, }{\f2\cgrid0 *destLen}{\f11\cgrid0  is unchanged, and }{\f2\cgrid0 BZ_OUTBUFF_FULL}{
\f11\cgrid0  is returned.
\par 
\par Compression in this manner is a one-shot event, done with a single call to this function.  The resulting compressed data is a complete }{\f2\cgrid0 bzip2}{\f11\cgrid0 
 format data stream.  There is no mechanism for making additional calls to provide extra input data.  If you want that kind of mechanism, use the low-level interface.
\par 
\par For the meaning of parameters }{\f2\cgrid0 blockSize100k}{\f11\cgrid0 , }{\f2\cgrid0 verbosity}{\f11\cgrid0  and }{\f2\cgrid0 workFactor}{\f11\cgrid0 ,
\par see }{\f2\cgrid0 bzCompressInit}{\f11\cgrid0 .
\par 
\par To guarantee that the compressed data will fit in its buffer, allocate an output buffer of size 1% larger than the uncompressed data, plus fifty bytes.
\par 
\par }{\f2\cgrid0 bzBuffToBuffDecompress}{\f11\cgrid0  will not write data at or beyond }{\f2\cgrid0 dest[*destLen]}{\f11\cgrid0 , even in case of buffer overflow.
\par 
\par Possible return values: 
\par }\pard \li720\keep\nowidctlpar\adjustright {\f2\cgrid0       BZ_PARAM_ERROR\line          if dest is NULL or destLen is NULL\line          or blockSize100k < 1 or blockSize100k > 9\line          or verbosity < 0 or verbosity > 4\line 
         or workFactor < 0 or workFactor > 250\line       BZ_MEM_ERROR\line          if insufficient memory is available\line       BZ_OUTBUFF_FULL\line          if the size of the compressed data exceeds *destLen\line       BZ_OK\line          otherwise

\par }\pard \nowidctlpar\adjustright {\f11\cgrid0 
\par }{\b\f2\cgrid0 bzBuffToBuffDecompress}{\f11\cgrid0 
\par 
\par }\pard \li720\keep\nowidctlpar\adjustright {\f2\cgrid0    int bzBuffToBuffDecompress ( char*         dest,\line                                 unsigned int* destLen,\line                                 char*         source,\line 
                                unsigned int  sourceLen,\line                                 int           small,\line                                 int           verbosity );
\par }\pard \nowidctlpar\adjustright {\f11\cgrid0 Attempts to decompress the data in }{\f2\cgrid0 source[0 .. sourceLen-1]}{\f11\cgrid0  into the destination buffer, }{\f2\cgrid0 dest[0 .. *destLen-1]}{\f11\cgrid0 .  If the destination buffer is big enough, }{
\f2\cgrid0 *destLen}{\f11\cgrid0  is set to the size of the uncompressed data, and }{\f2\cgrid0 BZ_OK}{\f11\cgrid0  is returned.  If the compressed data won't fit, }{\f2\cgrid0 *destLen}{\f11\cgrid0  is unchanged, and }{\f2\cgrid0 BZ_OUTBUFF_FULL}{
\f11\cgrid0  is returned.
\par 
\par }{\f2\cgrid0 source}{\f11\cgrid0  is assumed to hold a complete }{\f2\cgrid0 bzip2}{\f11\cgrid0  format data stream.  }{\f2\cgrid0 bzBuffToBuffDecompress}{\f11\cgrid0  tries to decompress the entirety of the stream into the output buffer.
\par 
\par For the meaning of parameters }{\f2\cgrid0 small}{\f11\cgrid0  and }{\f2\cgrid0 verbosity}{\f11\cgrid0 , see }{\f2\cgrid0 bzDecompressInit}{\f11\cgrid0 .
\par 
\par Because the compression ratio of the compre
ssed data cannot be known in advance, there is no easy way to guarantee that the output buffer will be big enough.  You may of course make arrangements in your code to record the size of the uncompressed data, but such a mechanism is beyond the scope of t
his library.
\par 
\par }{\f2\cgrid0 bzBuffToBuffDecompress}{\f11\cgrid0  will not write data at or beyond }{\f2\cgrid0 dest[*destLen]}{\f11\cgrid0 , even in case of buffer overflow.
\par 
\par Possible return values: 
\par }\pard \li720\keep\nowidctlpar\adjustright {\f2\cgrid0       BZ_PARAM_ERROR\line          if dest is NULL or destLen is NULL\line          or small != 0 && small != 1\line          or verbosity < 0 or verbosity > 4\line       BZ_MEM_ERROR\line 
         if insufficient memory is available\line       BZ_OUTBUFF_FULL\line          if the size of the compressed data exceeds *destLen\line       BZ_DATA_ERROR\line          if a data integrity error was detected in the compressed data\line 
      BZ_DATA_ERROR_MAGIC\line          if the compressed data doesn't begin with the right magic bytes\line       BZ_UNEXPECTED_EOF\line          if the compressed data ends unexpectedly\line       BZ_OK\line          otherwise
\par }\pard \nowidctlpar\adjustright {\f11\cgrid0 
\par }{\b\f11\cgrid0 Using the library in a }{\b\f2\cgrid0 stdio}{\b\f11\cgrid0 -free environment}{\f11\cgrid0 
\par 
\par }{\b\f11\cgrid0 Getting rid of }{\b\f2\cgrid0 stdio}{\f11\cgrid0 
\par 
\par In a deeply embedded application, you might want to use just the memory-to-memory functions.  You can do this conveniently by compiling the library with preprocessor symbol }{\f2\cgrid0 BZ_NO_STDIO}{\f11\cgrid0  defined.  Doing this gives you a libra
ry containing only the following eight functions:
\par 
\par }{\f2\cgrid0 bzCompressInit}{\f11\cgrid0 , }{\f2\cgrid0 bzCompress}{\f11\cgrid0 , }{\f2\cgrid0 bzCompressEnd}{\f11\cgrid0 
\par }{\f2\cgrid0 bzDecompressInit}{\f11\cgrid0 , }{\f2\cgrid0 bzDecompress}{\f11\cgrid0 , }{\f2\cgrid0 bzDecompressEnd}{\f11\cgrid0 
\par }{\f2\cgrid0 bzBuffToBuffCompress}{\f11\cgrid0 , }{\f2\cgrid0 bzBuffToBuffDecompress}{\f11\cgrid0 
\par 
\par When compiled like this, all functions will ignore }{\f2\cgrid0 verbosity}{\f11\cgrid0  settings.
\par 
\par }{\b\f11\cgrid0 Critical error handling}{\f11\cgrid0 
\par 
\par }{\f2\cgrid0 libbzip2}{\f11\cgrid0  contains a number of internal assertion checks which should, needless to say, never be activated.  Nevertheless, if an assertion should fail, behaviour depends on whether or not the library was compiled with }{
\f2\cgrid0 BZ_NO_STDIO}{\f11\cgrid0  set.
\par 
\par For a normal compile, an assertion failure yields the message 
\par }\pard \li720\keep\nowidctlpar\adjustright {\f2\cgrid0    bzip2/libbzip2, v0.9.0: internal error number N.\line    This is a bug in bzip2/libbzip2, v0.9.0.  Please report\line    it to me at: jseward@acm.org.  If this happened when\line 
   you were using some program which uses libbzip2 as a\line    component, you should also report this bug to the author(s)\line    of that program.  Please make an effort to report this bug;\line 
   timely and accurate bug reports eventually lead to higher\line    quality software.  Thx.  Julian Seward, 27 June 1998.
\par }\pard \nowidctlpar\adjustright {\f11\cgrid0 where }{\f2\cgrid0 N}{\f11\cgrid0  is some error code number.  }{\f2\cgrid0 exit(3)}{\f11\cgrid0  is then called.
\par 
\par For a }{\f2\cgrid0 stdio}{\f11\cgrid0 -free library, assertion failures result in a call to a function declared as: 
\par }\pard \li720\keep\nowidctlpar\adjustright {\f2\cgrid0    extern void bz_internal_error ( int errcode );
\par }\pard \nowidctlpar\adjustright {\f11\cgrid0 The relevant code is passed as a parameter.  You should supply such a function.
\par 
\par In either case, once an assertion failure has occurred, any }{\f2\cgrid0 bz_stream}{\f11\cgrid0  records involved can be regarded as invalid.  You should not attempt to resume normal operation with them.
\par 
\par You may, of course, change 
critical error handling to suit your needs.  As I said above, critical errors indicate bugs in the library and should not occur.  All "normal" error situations are indicated via error return codes from functions, and can be recovered from.
\par 
\par }{\b\f11\cgrid0 Making a Windows DLL}{\f11\cgrid0 
\par 
\par Everything related to Windows has been contributed by Yoshioka Tsuneo
\par (}{\f2\cgrid0 QWF00133@niftyserve.or.jp}{\f11\cgrid0  / }{\f2\cgrid0 tsuneo-y@is.aist-nara.ac.jp}{\f11\cgrid0 ), so you should send your queries to him (but perhaps Cc: me, }{\f2\cgrid0 jseward@acm.org}{\f11\cgrid0 ).
\par 
\par My vague understanding of what to do is: using Visual C++ 5.0, open the project file }{\f2\cgrid0 libbz2.dsp}{\f11\cgrid0 , and build.  That's all.
\par 
\par If you can't open the project file for some reason, make a new one, naming these files: }{\f2\cgrid0 blocksort.c}{\f11\cgrid0 , }{\f2\cgrid0 bzlib.c}{\f11\cgrid0 , }{\f2\cgrid0 compress.c}{\f11\cgrid0 , }{\f2\cgrid0 crctable.c}{\f11\cgrid0 , }{\f2\cgrid0 
decompress.c}{\f11\cgrid0 , }{\f2\cgrid0 huffman.c}{\f11\cgrid0 ,
\par }{\f2\cgrid0 randtable.c}{\f11\cgrid0  and }{\f2\cgrid0 libbz2.def}{\f11\cgrid0 .  You might also need to name the header files }{\f2\cgrid0 bzlib.h}{\f11\cgrid0  and }{\f2\cgrid0 bzlib_private.h}{\f11\cgrid0 .
\par 
\par If you don't use VC++, you may need to define the proprocessor symbol }{\f2\cgrid0 _WIN32}{\f11\cgrid0 .
\par 
\par Finally, }{\f2\cgrid0 dlltest.c}{\f11\cgrid0  is a sample program using the DLL.  It has a project file, }{\f2\cgrid0 dlltest.dsp}{\f11\cgrid0 .
\par 
\par I haven't tried any of this stuff myself, but it all looks plausible.
\par 
\par }{\b\f11\cgrid0 Miscellanea}{\f11\cgrid0 
\par 
\par These are just some random thoughts of mine.  Your mileage may vary.
\par 
\par }{\b\f11\cgrid0 Limitations of the compressed file format}{\f11\cgrid0 
\par 
\par }{\f2\cgrid0 bzip2-0.9.0}{\f11\cgrid0  uses exactly the same file format as the previous version, }{\f2\cgrid0 bzip2-0.1}{\f11\cgrid0 
.  This decision was made in the interests of stability.  Creating yet another incompatible compressed file format would create further confusion and disruption for users.
\par 
\par Nevertheless, this is not a painless decision.  Development work since the release of }{\f2\cgrid0 bzip2-0.1}{\f11\cgrid0 
 in August 1997 has shown complexities in the file format which slow down decompression and, in retrospect, are unnecessary.  These are: 
\par }\pard \fi-720\li720\nowidctlpar\tx144\tx720\adjustright {\f11\cgrid0 \tab }{\f3\cgrid0 \'b7}{\f11\cgrid0 \tab The run-length encoder, which is the first of the       compression transformat
ions, is entirely irrelevant.        The original purpose was to protect the sorting algorithm       from the very worst case input: a string of repeated       symbols.  But algorithm steps Q6a and Q6b in the original       Burrows-Wheeler technical repor
t (SRC-124) show how       repeats can be handled without difficulty in block       sorting.
\par 
\par \tab }{\f3\cgrid0 \'b7}{\f11\cgrid0 \tab The randomisation mechanism doesn't really need to be       there.  Udi Manber and Gene Myers published a suffix       array construction algorithm a few years 
back, which       can be employed to sort any block, no matter how       repetitive, in O(N log N) time.  Subsequent work by       Kunihiko Sadakane has produced a derivative O(N (log N)^2)       algorithm which usually outperforms the Manber-Myers       
algorithm.
\par 
\par }\pard \li720\nowidctlpar\adjustright {\f11\cgrid0 I could have changed to Sadakane's algorithm, but I find       it to be slower than }{\f2\cgrid0 bzip2}{\f11\cgrid0 
's existing algorithm for       most inputs, and the randomisation mechanism protects       adequately against bad cases.  I didn't think it was       a goo
d tradeoff to make.  Partly this is due to the fact       that I was not flooded with email complaints about       }{\f2\cgrid0 bzip2-0.1}{\f11\cgrid0 's performance on repetitive data, so       perhaps it isn't a problem for real inputs.
\par 
\par Probably the best long-term solution       
is to use the existing sorting       algorithm initially, and fall back to a O(N (log N)^2)       algorithm if the standard algorithm gets into difficulties.        This can be done without much difficulty; I made       a prototype implementation of it so
me months now.
\par 
\par }\pard \fi-720\li720\nowidctlpar\tx144\tx720\adjustright {\f11\cgrid0 \tab }{\f3\cgrid0 \'b7}{\f11\cgrid0 \tab 
The compressed file format was never designed to be       handled by a library, and I have had to jump though       some hoops to produce an efficient implementation of       decompression.  It's a bit hairy.  Try passing       }{\f2\cgrid0 decompress.c}{
\f11\cgrid0  through the C preprocessor       and you'll see what I mean.  Much of this complexity       could have been avoided if the compressed size of       each block of data was recorded in the data stream.
\par 
\par \tab }{\f3\cgrid0 \'b7}{\f11\cgrid0 \tab An Adler-32 checksum, rather than a CRC32 checksum,       would be faster to compute.
\par }\pard \nowidctlpar\adjustright {\f11\cgrid0 It would be fair to say that the }{\f2\cgrid0 bzip2}{\f11\cgrid0  format was frozen before I properly and fully understood the performance consequences of doing so.
\par 
\par Improvements which I have been able to incorporate into 0.9.0, despite using the same file format, are: 
\par }\pard \fi-720\li720\nowidctlpar\tx144\tx720\adjustright {\f11\cgrid0 \tab }{\f3\cgrid0 \'b7}{\f11\cgrid0 \tab 
Single array implementation of the inverse BWT.  This       significantly speeds up decompression, presumably       because it reduces the number of cache misses.
\par 
\par \tab }{\f3\cgrid0 \'b7}{\f11\cgrid0 \tab Faster inverse MTF transform for large MTF values.  The       new implementation is based on the notion of sliding blocks       of values.
\par 
\par \tab }{\f3\cgrid0 \'b7}{\f11\cgrid0 \tab }{\f2\cgrid0 bzip2-0.9.0}{\f11\cgrid0  now reads and writes files with }{\f2\cgrid0 fread}{\f11\cgrid0        and }{\f2\cgrid0 fwrite}{\f11\cgrid0 ; version 0.1 used }{\f2\cgrid0 putc}{\f11\cgrid0  and }{
\f2\cgrid0 getc}{\f11\cgrid0 .        Duh! I'm embarrassed at my own moronicness (moronicity?) on this       one.
\par 
\par }\pard \nowidctlpar\adjustright {\f11\cgrid0 Further ahead, it would be nice to be able to do random access into files.  This will require some careful design of compressed file formats.
\par 
\par }{\b\f11\cgrid0 Portability issues}{\f11\cgrid0 
\par 
\par After some consideration, I have decided not to use GNU }{\f2\cgrid0 autoconf}{\f11\cgrid0  to configure 0.9.0.
\par 
\par }{\f2\cgrid0 autoconf}{\f11\cgrid0 , admirable and wonderful though it is, mainly assists with portability problems between Unix-like platforms.  But }{\f2\cgrid0 bzip2}{\f11\cgrid0  doesn't have much in the way of portability problems on Unix; m
ost of the difficulties appear when porting to the Mac, or to Microsoft's operating systems.  }{\f2\cgrid0 autoconf}{\f11\cgrid0  doesn't help in those cases, and brings in a whole load of new complexity.
\par 
\par Most people should be able to compile the library and program under Unix straight out-of-the-box, so to speak, especially if you have a version of GNU C available.
\par 
\par There are a couple of }{\f2\cgrid0 __inline__}{\f11\cgrid0  directives in the code.  GNU C (}{\f2\cgrid0 gcc}{\f11\cgrid0 ) should be able to handle them.  If your compiler doesn't like them, just }{\f2\cgrid0 #define}{\f11\cgrid0  }{\f2\cgrid0 __inline__
}{\f11\cgrid0  to be null.  One easy way to do this is to compile with the flag }{\f2\cgrid0 -D__inline__=}{\f11\cgrid0 , which should be understood by most Unix compilers.
\par 
\par If you still have difficulties, try compiling with the macro }{\f2\cgrid0 BZ_STRICT_ANSI}{\f11\cgrid0 
 defined.  This should enable you to build the library in a strictly ANSI compliant environment.  Building the program itself like this is dangerous and not supported, since you remove }{\f2\cgrid0 bzip2}{\f11\cgrid0 
's checks against compressing directories, symbolic links, devices, and other not-really-a-file entities.  This could cause filesystem corruption!
\par 
\par One other thing: if you create a }{\f2\cgrid0 bzip2}{\f11\cgrid0  binary for public distribution, please try and link it statically (}{\f2\cgrid0 gcc -s}{\f11\cgrid0 ).  This avoids all sorts of library-version issues that others may encounter later on.

\par 
\par }{\b\f11\cgrid0 Reporting bugs}{\f11\cgrid0 
\par 
\par I tried pretty hard to make sure }{\f2\cgrid0 bzip2}{\f11\cgrid0  is bug free, both by design and by testing.  Hopefully you'll never need to read this section for real.
\par 
\par Nevertheless, if }{\f2\cgrid0 bzip2}{\f11\cgrid0  dies with a segmentation fault, a bus error or
 an internal assertion failure, it will ask you to email me a bug report.  Experience with version 0.1 shows that almost all these problems can be traced to either compiler bugs or hardware problems.  
\par }\pard \fi-720\li720\nowidctlpar\tx144\tx720\adjustright {\f11\cgrid0 \tab }{\f3\cgrid0 \'b7}{\f11\cgrid0 \tab 
Recompile the program with no optimisation, and see if it works.  And/or try a different compiler.  I heard all sorts of stories about various flavours of GNU C (and other compilers) generating bad code for }{\f2\cgrid0 bzip2}{\f11\cgrid0 
, and I've run across two such examples myself.
\par 
\par }\pard \li720\nowidctlpar\adjustright {\f11\cgrid0 2.7.X versions of GNU C are known to generate bad code from time to time, at high optimisation levels.  If you get problems, try using the flags }{\f2\cgrid0 -O2}{\f11\cgrid0  }{\f2\cgrid0 
-fomit-frame-pointer}{\f11\cgrid0  }{\f2\cgrid0 -fno-strength-reduce}{\f11\cgrid0 .  You should specifically }{\i\f11\cgrid0 not}{\f11\cgrid0  use }{\f2\cgrid0 -funroll-loops}{\f11\cgrid0 .
\par 
\par You may notice that the Makefile runs four tests as part of the build process.  If the program passes all of these, it's a pretty good (but not 100%) indication that the compiler has done its job correctly.
\par 
\par }\pard \fi-720\li720\nowidctlpar\tx144\tx720\adjustright {\f11\cgrid0 \tab }{\f3\cgrid0 \'b7}{\f11\cgrid0 \tab If }{\f2\cgrid0 bzip2}{\f11\cgrid0  crashes randomly, and the crashes are not repeatable, you may have a flaky memory subsystem.  }{\f2\cgrid0 
bzip2}{\f11\cgrid0  really hammers your memory hierarchy, and if it's a bit marginal, you may get these problems.  Ditto if your disk or I/O subsystem is slowly failing.  Yup, this really does happen.
\par 
\par }\pard \li720\nowidctlpar\adjustright {\f11\cgrid0 Try using a different machine of the same type, and see if you can repeat the problem.
\par 
\par }\pard \fi-720\li720\nowidctlpar\tx144\tx720\adjustright {\f11\cgrid0 \tab }{\f3\cgrid0 \'b7}{\f11\cgrid0 \tab This isn't really a bug, but ... If }{\f2\cgrid0 bzip2}{\f11\cgrid0 
 tells you your file is corrupted on decompression, and you obtained the file via FTP, there is a possibility that you forgot to tell FTP to do
 a binary mode transfer.  That absolutely will cause the file to be non-decompressible.  You'll have to transfer it again.
\par }\pard \nowidctlpar\adjustright {\f11\cgrid0 
\par If you've incorporated }{\f2\cgrid0 libbzip2}{\f11\cgrid0  into your own program and are getting problems, please, please, please, check that the parameters yo
u are passing in calls to the library, are correct, and in accordance with what the documentation says is allowable.  I have tried to make the library robust against such problems, but I'm sure I haven't succeeded.
\par 
\par Finally, if the above comments don't help, you'll have to send me a bug report.  Now, it's just amazing how many people will send me a bug report saying something like 
\par }\pard \li720\keep\nowidctlpar\adjustright {\f2\cgrid0    bzip2 crashed with segmentation fault on my machine
\par }\pard \nowidctlpar\adjustright {\f11\cgrid0 and absolutely nothing else.  Needless to say, a such a report is }{\i\f11\cgrid0 totally, utterly, completely and comprehensively 100% useless; a waste of your time, my time, and net bandwidth}{\f11\cgrid0 
.  With no details at all, there's no way I can possibly begin to figure out what the problem is.
\par 
\par The rules of the game are: facts, facts, facts.  Don't omit them because "oh, they won't be relevant".  At the bare minimum: 
\par }\pard \li720\keep\nowidctlpar\adjustright {\f2\cgrid0    Machine type.  Operating system version.\line    Exact version of bzip2 (do bzip2 -V).\line    Exact version of the compiler used.\line    Flags passed to the compiler.
\par }\pard \nowidctlpar\adjustright {\f11\cgrid0 However, the most importa
nt single thing that will help me is the file that you were trying to compress or decompress at the time the problem happened.  Without that, my ability to do anything more than speculate about the cause, is limited.
\par 
\par Please remember that I connect to the Internet with a modem, so you should contact me before mailing me huge files.
\par 
\par }{\b\f11\cgrid0 Did you get the right package?}{\f11\cgrid0 
\par 
\par }{\f2\cgrid0 bzip2}{\f11\cgrid0 
 is a resource hog.  It soaks up large amounts of CPU cycles and memory.  Also, it gives very large latencies.  In the worst case, you can feed many megabytes of uncompressed data into the library before getting any compressed output, so this probably rul
es out applications requiring interactive behaviour.
\par 
\par These aren't faults of my implementation, I hope, but more an intrinsic property of the Burrows-Wheeler transform (unfortunately).  Maybe this isn't what you want.
\par 
\par If you want a compressor and/or library which is faster, uses less memory but gets pretty good compression, and has minimal latency, consider Jean-loup Gailly's and Mark Adler's work, }{\f2\cgrid0 zlib-1.1.2}{\f11\cgrid0  and }{\f2\cgrid0 gzip-1.2.4}{
\f11\cgrid0 .  Look for them at }{\f2\cgrid0 http://www.cdrom.com/pub/infozip/zlib}{\f11\cgrid0  and }{\f2\cgrid0 http://www.gzip.org}{\f11\cgrid0  respectively.
\par 
\par For something faster and lighter still, you might try Markus F X J Oberhumer's }{\f2\cgrid0 LZO}{\f11\cgrid0  real-time compression/decompression library, at
\par }{\f2\cgrid0 http://wildsau.idv.uni-linz.ac.at/mfx/lzo.html}{\f11\cgrid0 .
\par 
\par If you want to use the }{\f2\cgrid0 bzip2}{\f11\cgrid0  algorithms to compress small blocks
 of data, 64k bytes or smaller, for example on an on-the-fly disk compressor, you'd be well advised not to use this library.  Instead, I've made a special library tuned for that kind of use.  It's part of }{\f2\cgrid0 e2compr-0.40}{\f11\cgrid0 
, an on-the-fly disk compressor for the Linux }{\f2\cgrid0 ext2}{\f11\cgrid0  filesystem.  Look at }{\f2\cgrid0 http://www.netspace.net.au/~reiter/e2compr}{\f11\cgrid0 .
\par 
\par }{\b\f11\cgrid0 Testing}{\f11\cgrid0 
\par 
\par A record of the tests I've done.
\par 
\par First, some data sets: 
\par }\pard \fi-720\li720\nowidctlpar\tx144\tx720\adjustright {\f11\cgrid0 \tab }{\f3\cgrid0 \'b7}{\f11\cgrid0 \tab B: a directory containing a 6001 files, one for every length in the       range 0 to 6000 bytes.  The files 
contain random lowercase       letters.  18.7 megabytes.
\par 
\par \tab }{\f3\cgrid0 \'b7}{\f11\cgrid0 \tab H: my home directory tree.  Documents, source code, mail files,       compressed data.  H contains B, and also a directory of       files designed as boundary cases for the sorting; mostly very  
     repetitive, nasty files.  445 megabytes.
\par 
\par \tab }{\f3\cgrid0 \'b7}{\f11\cgrid0 \tab A: directory tree holding various applications built from source:       }{\f2\cgrid0 egcs-1.0.2}{\f11\cgrid0 , }{\f2\cgrid0 gcc-2.8.1}{\f11\cgrid0 , KDE Beta 4, GTK, Octave, etc.        827 megabytes.
\par 
\par \tab }{\f3\cgrid0 \'b7}{\f11\cgrid0 \tab P: directory tree holding large amounts of source code (}{\f2\cgrid0 .tar}{\f11\cgrid0        files) of the entire GNU distribution, plus a couple of       Linux distributions.  2400 megabytes.
\par }\pard \nowidctlpar\adjustright {\f11\cgrid0 The tests conducted are as follows.  Each test means compressing (a copy of) each file in the data set, decompressing it and comparing it against the original.
\par 
\par First, a bunch of tests with block sizes, internal buffer sizes and randomisation lengths set very small, to detect any problems with the blocking, buffering and randomisation mechanis
ms.  This required modifying the source code so as to try to break it.  
\par }\pard \fi-720\li720\nowidctlpar\tx144\tx720\adjustright {\f11\cgrid0 \tab 1. \tab Data set H, with       buffer size of 1 byte, and block size of 23 bytes.
\par 
\par \tab 2. \tab Data set B, buffer sizes 1 byte, block size 1 byte.
\par 
\par \tab 3. \tab As (2) but small-mode decompression (first 1700 files).
\par 
\par \tab 4. \tab As (2) with block size 2 bytes.
\par 
\par \tab 5. \tab As (2) with block size 3 bytes.
\par 
\par \tab 6. \tab As (2) with block size 4 bytes.
\par 
\par \tab 7. \tab As (2) with block size 5 bytes.
\par 
\par \tab 8. \tab As (2) with block size 6 bytes and small-mode decompression.
\par 
\par \tab 9. \tab H with normal buffer sizes (5000 bytes), normal block       size (up to 900000 bytes), but with randomisation       mechanism running intensely (randomising approximately every       third byte).
\par 
\par \tab 10. \tab As (9) with small-mode decompression.
\par }\pard \nowidctlpar\adjustright {\f11\cgrid0 Then some tests with unmodified source code.  
\par }\pard \fi-720\li720\nowidctlpar\tx144\tx720\adjustright {\f11\cgrid0 \tab 1. \tab H, all settings normal.
\par 
\par \tab 2. \tab As (1), with small-mode decompress.
\par 
\par \tab 3. \tab H, compress with flag }{\f2\cgrid0 -1}{\f11\cgrid0 .
\par 
\par \tab 4. \tab H, compress with flag }{\f2\cgrid0 -s}{\f11\cgrid0 , decompress with flag }{\f2\cgrid0 -s}{\f11\cgrid0 .
\par 
\par \tab 5. \tab Forwards compatibility: H, }{\f2\cgrid0 bzip2-0.1pl2}{\f11\cgrid0  compressing,       }{\f2\cgrid0 bzip2-0.9.0}{\f11\cgrid0  decompressing, all settings normal.
\par 
\par \tab 6. \tab Backwards compatibility:  H, }{\f2\cgrid0 bzip2-0.9.0}{\f11\cgrid0  compressing,       }{\f2\cgrid0 bzip2-0.1pl2}{\f11\cgrid0  decompressing, all settings normal.
\par 
\par \tab 7. \tab Bigger tests: A, all settings normal.
\par 
\par \tab 8. \tab P, all settings normal.
\par 
\par \tab 9. \tab Misc test: about 100 megabytes of }{\f2\cgrid0 .tar}{\f11\cgrid0  files with       }{\f2\cgrid0 bzip2}{\f11\cgrid0  compiled with Purify.
\par 
\par \tab 10. \tab Misc tests to make sure it builds and runs ok on non-Linux/x86       platforms.
\par }\pard \nowidctlpar\adjustright {\f11\cgrid0 These tests were conducted on a 205 MHz Cyrix 6x86MX machine, running Linux 2.0.32.  They represent nearly a week of continuous computation.  All tests completed successfully.
\par 
\par }{\b\f11\cgrid0 Further reading}{\f11\cgrid0 
\par 
\par }{\f2\cgrid0 bzip2}{\f11\cgrid0  is not research work, in the sense that it doesn't present any new ideas.  Rather, it's an engineering exercise based on existing ideas.
\par 
\par Four documents describe essentially all the ideas behind }{\f2\cgrid0 bzip2}{\f11\cgrid0 : 
\par }\pard \li720\keep\nowidctlpar\adjustright {\f2\cgrid0 Michael Burrows and D. J. Wheeler:\line   "A block-sorting lossless data compression algorithm"\line    10th May 1994.\line    Digital SRC Research Report 124.\line    ftp://ftp.digital.com/pub/D
EC/SRC/research-reports/SRC-124.ps.gz\line    If you have trouble finding it, try searching at the\line    New Zealand Digital Library, http://www.nzdl.org.\line \line Daniel S. Hirschberg and Debra A. LeLewer\line   "Efficient Decoding of Prefix Codes"
\line    Communications of the ACM, April 1990, Vol 33, Number 4.\line    You might be able to get an electronic copy of this\line       from the ACM Digital Library.\line \line David J. Wheeler\line    Program bred3.c and accompanying document bred3.ps.
\line    This contains the idea behind the multi-table Huffman\line    coding scheme.\line    ftp://ftp.cl.cam.ac.uk/pub/user/djw3/\line \line Jon L. Bentley and Robert Sedgewick\line   "Fast Algorithms for Sorting and Searching Strings"\line 
   Available from Sedgewick's web page,\line    www.cs.princeton.edu/~rs
\par }\pard \nowidctlpar\adjustright {\f11\cgrid0 The following paper gives valuable additional insights into the algorithm, but is not immediately the basis of any code used in bzip2.  
\par }\pard \li720\keep\nowidctlpar\adjustright {\f2\cgrid0 Peter Fenwick:\line    Block Sorting Text Compression\line    Proceedings of the 19th Australasian Computer Science Conference,\line      Melbourne, Australia.  Jan 31 - Feb 2, 1996.\line 
   ftp://ftp.cs.auckland.ac.nz/pub/peter-f/ACSC96paper.ps
\par }\pard \nowidctlpar\adjustright {\f11\cgrid0 Kunihiko Sadakane's sorting algorithm, mentioned above, is available from: 
\par }\pard \li720\keep\nowidctlpar\adjustright {\f2\cgrid0 http://naomi.is.s.u-tokyo.ac.jp/~sada/papers/Sada98b.ps.gz
\par }\pard \nowidctlpar\adjustright {\f11\cgrid0 The Manber-Myers suffix array construction algorithm is described in a paper available from: 
\par }\pard \li720\keep\nowidctlpar\adjustright {\f2\cgrid0 http://www.cs.arizona.edu/people/gene/PAPERS/suffix.ps
\par }\pard \nowidctlpar\adjustright {\f11\cgrid0 
\par \page }{\cs18\super K{\footnote\ftnalt \pard\plain \nowidctlpar\adjustright \fs20\lang1031\cgrid {\cs18\super M}{\f11\fs24\cgrid0 akertf}}#{\footnote\ftnalt \pard\plain \nowidctlpar\adjustright \fs20\lang1031\cgrid {\cs18\super h}{\f11\fs24\cgrid0 
cMakertf}}${\footnote\ftnalt \pard\plain \nowidctlpar\adjustright \fs20\lang1031\cgrid {\cs18\super A}{\f11\fs24\cgrid0 bout Makertf}}}{\b\f11\fs24\cgrid0 About Makertf}{\f11\cgrid0 
\par 
\par Makertf is a program that converts "Texinfo" files into "Rich Text Format" (RTF) files. It can be used to make WinHelp Files from GNU manuals and other documentation written in Texinfo. Visit http://www.snafu.de/~cschenk/makertf for more information.}{

\par }}